lucene

一,什么是lucene

  lucene是一个全文检索工具,原理类似与字典

二,原理

  全文检索算法(倒排索引算法)
        描述:把数据库中的所有内容都查询出来,然后进行切分词, 把切开分词组成索引(目录),把内容放到文档对象中,索引与文档组成索引库; 检索时,先查询到索引,索引与文档之间有联系,通过联系可以快速确定文档的位置,返回文档,这就是倒排索引算法.
        缺点:空间换时间
        优点:查询效率高,不会随着数据的大量增长而效率明显降低
        举例:字典:把所有的字偏旁部首都取出来,组成目录,目录与后面的内容有联系, 通过目录能快速的找到字的详细


三,代码

  (1)引入依赖

<dependencies>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.35</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <!--lucene分词器-->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>4.10.3</version>
        </dependency>
        <!--lucene核心-->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>4.10.3</version>
        </dependency>
        <!--查询解析对象-->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.2</version>
        </dependency>
        <!--ik中文分词器-->
        <dependency>
            <groupId>com.janeluo</groupId>
            <artifactId>ikanalyzer</artifactId>
            <version>2012_u6</version>
        </dependency>

    </dependencies>
lucene依赖

   (2)创建索引库

package com.myjava;

import com.myjava.dao.BookDao;
import com.myjava.domain.Book;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class Test {
    /**
     * 创建索引库
     * @throws Exception
     */
    @org.junit.Test
    public void test() throws Exception {
        //1.查询所有数据
        List<Book> bookList = new BookDao().findAll();

        //2.把内容放到文档对象中
        List<Document> docList = new ArrayList<Document>();
        for (Book book : bookList) {
            Document document = new Document();
            //对象每一列对应文档中的域
            TextField idField = new TextField("id",book.getId(), Field.Store.YES);
            TextField nameField = new TextField("name",book.getName(), Field.Store.YES);
            TextField priceField = new TextField("price",String.valueOf(book.getPrice()), Field.Store.YES);
            TextField picField = new TextField("pic",book.getPic(), Field.Store.YES);
            TextField descriptionField = new TextField("description",book.getDescription(), Field.Store.YES);

            document.add(idField);
            document.add(nameField);
            document.add(priceField);
            document.add(picField);
            document.add(descriptionField);

            //把文档对象添加到文档集合
            docList.add(document);
        }


        //3.创建索引库的位置
        FSDirectory directory = FSDirectory.open(new File("d:/dic"));

        //4.创建分词器对象
        Analyzer analyzer = new IKAnalyzer();

        //5.把文档存入索引库(5.1获取索引输出流对象)
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
        //重点!!!!!!!!
        IndexWriter writer = new IndexWriter(directory,config);

        //5.2用输出流对象把文档对象写入到索引库
        for (Document document : docList) {
            writer.addDocument(document);
        }

        writer.commit();
        writer.close();

    }
}
创建索引库

    (3.0)查询索引

package com.myjava;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import java.io.File;
import java.io.IOException;

public class SearchIndexTest {

    @Test
    public void test() throws Exception {
        //查询索引对象
        FSDirectory fsDirectory = FSDirectory.open(new File("d:/dic"));
        IndexReader reader = IndexReader.open(fsDirectory);
        //重点
        IndexSearcher indexSearcher = new IndexSearcher(reader);



        //查询关键字对象
        Analyzer analyzer = new StandardAnalyzer();
        QueryParser queryParser = new QueryParser("name", analyzer);
        Query query = queryParser.parse("description:java");
        TopDocs topDocs = indexSearcher.search(query, 5);
        //分数文档对象数组
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int docId = scoreDoc.doc;
            //here
            Document document = indexSearcher.doc(docId);
            //then
            System.out.println("id:"+document.get("id"));
            System.out.println("name:"+document.get("name"));
            System.out.println("pic:"+document.get("pic"));
            System.out.println("price:"+document.get("price"));
            System.out.println("description:"+document.get("description"));
        }



    }
}
查询索引

   (3.1)删除索引

package com.myjava;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import java.io.File;
import java.io.IOException;

public class DeleteIndexTest {

    @Test
    public void test() throws Exception {


        //索引输出流
        FSDirectory directory = FSDirectory.open(new File("d:/dic"));
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,new StandardAnalyzer());
        //重点!!!!!
        IndexWriter indexWriter = new IndexWriter(directory,config);

        QueryParser queryParser = new QueryParser("name",new StandardAnalyzer());
        Query query = queryParser.parse("id:2");

/*
        Term term = new Term("id","1");
*/
        //删除文档而不删除索引
        indexWriter.deleteDocuments(query);

        //删除全部文档和索引
        indexWriter.deleteAll();

        indexWriter.commit();
        indexWriter.close();



    }
}
删除索引

   (3.2)更新索引

package com.myjava;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import java.io.File;
import java.io.IOException;

public class UpdateIndexTest {


    //update是删除原本索引的文档然创建新的索引和文档(注意:保留原索引)
    @Test
    public void test() throws Exception {
        FSDirectory fsDirectory = FSDirectory.open(new File("d:/dic"));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);

        //重点!!!!!!!!!
        IndexWriter indexWriter = new IndexWriter(fsDirectory,config);

        Term term = new Term("id","2");
        Document document = new Document();
        TextField idField = new TextField("id","6", Field.Store.YES);
        TextField nameField = new TextField("name","MDZZ", Field.Store.YES);
        TextField priceField = new TextField("price","998", Field.Store.YES);
        TextField picField = new TextField("pic","daushd.jpg", Field.Store.YES);
        TextField descriptionField = new TextField("description","hhhhhhhhhh", Field.Store.YES);

        document.add(idField);
        document.add(nameField);
        document.add(priceField);
        document.add(picField);
        document.add(descriptionField);
        
        indexWriter.updateDocument(term,document);

        indexWriter.commit();
        indexWriter.close();

    }
}
更新索引

   (4)ik中文分词器对象

Analyzer analyzer = new IKAnalyzer();

   注意:使用ik中文分词器需要导入配置文件

 

四,域对象的选择

  域对象的选择主要取决于三个问题

     1.是否分词:分词的目的就是索引,分词后是否有意义,如果有意义,则分词,无意义,则不分词

    2.是否索引:查询该对象时是否需要索引来查询

      3.是否存储: 是否存储到索引库中, 在查询页面需要展示就需要存储,不需要展示则不需要存储

  

  注意:如果在分词时需要区间(范围)检索,则必须分词,必须索引,必须存储,这是lucene的底层规则

    

 

  

posted @ 2019-12-16 18:18  zddsl  阅读(212)  评论(0)    收藏  举报