IK Analyzer 简单使用

   Lucene 是使用Java语言开发的全文检索引擎, IK Analyzer 是一个开源的、基于Java 语言开发的轻量级的中文分词工具包 。下面测试代码使用:Lucene-xxx-4.9.jar 、IKAnalyzer-2012_FF.jar

  1. 创建Index

package test;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public abstract class AbstractLuceneIndex<E> {

    private String path = "index";
    private IKAnalyzer analyzer = new IKAnalyzer();
    
    public AbstractLuceneIndex(){}

    public AbstractLuceneIndex(String path) {
        this.path = path;
    }

    private Directory openDirectory() {

        File file = new File(path);

        if (!file.exists()) {
            file.mkdirs();
        }

        if (!file.isDirectory()) {
            file.mkdirs();
        }

        if (!file.canWrite()) {
            file.setWritable(true);
        }

        Directory dir = null;
        try {
            dir = FSDirectory.open(file);
        } catch (IOException e) {
            e.printStackTrace();
        }

        return dir;
    }

    public void addDoc(E doc) {
        Directory dir = openDirectory();
        if (dir == null) {
            System.out.println("Wrong !");
            System.exit(1);
        }
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        try {
            IndexWriter iwriter = new IndexWriter(dir, iwc);
            iwriter.addDocument(setDoc(doc));
            iwriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    public void addDocList(List<E> docs) {
        if (docs == null) return;
        
        Directory dir = openDirectory();
        if (dir == null) {
            System.out.println("Wrong !");
            System.exit(1);
        }
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        try {
            IndexWriter iwriter = new IndexWriter(dir, iwc);
            for(E doc : docs) {
                iwriter.addDocument(setDoc(doc));
            }
            iwriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void updateDoc(String id, E data) throws IOException {
        Directory dir = this.openDirectory();
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = new IndexWriter(dir, iwc);
        Document doc = setDoc(data);
        writer.updateDocument(new Term("pid", id), doc);
        writer.close();
    }

    public void deleteDoc(String id) throws IOException {
        Directory dir = this.openDirectory();
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        IndexWriter writer = new IndexWriter(dir, iwc);
        writer.deleteDocuments(new Term("pid", id));
        writer.close();
    }

    public void deleteAll() throws IOException {
        Directory dir = this.openDirectory();
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        IndexWriter writer = new IndexWriter(dir, iwc);
        writer.deleteAll();
        writer.close();
    }

    public abstract Document setDoc(E doc);
}

实现:

package test;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;

import dto.ProductBaseInfo;

public class ProductLuceneIndex extends AbstractLuceneIndex<ProductBaseInfo>{

    public ProductLuceneIndex() {}
    public ProductLuceneIndex(String path) {
        super(path);
    }

    @Override
    public Document setDoc(ProductBaseInfo info) {
        Document doc = new Document();
        doc.add(new StringField("pid", info.getPid(), Store.YES));
        doc.add(new TextField("name", info.getName(), Store.YES));
        doc.add(new DoubleField("price", info.getPrice(), Store.YES));
        doc.add(new TextField("detail", info.getDetail(), Store.YES));

        return doc;
    }

}

  2. 查询Index

package test;


import java.util.List;

import org.wltea.analyzer.lucene.IKAnalyzer;

public abstract class AbstactLuceneSearch<E> {

    protected String path = "index";
    protected IKAnalyzer analyzer = new IKAnalyzer();
    
    public AbstactLuceneSearch() {};
    
    public AbstactLuceneSearch(String path) {
        this.path = path;
    }
    
    public abstract List<E> SearchFromIndex(String keyword);
}

实现:

package test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import dto.ProductBaseInfo;

public class ProductLuceneSearch extends AbstactLuceneSearch<ProductBaseInfo> {

    
    public ProductLuceneSearch(String path) {
        super(path);
    }
    
    @Override
    public List<ProductBaseInfo> SearchFromIndex(String keywords) {
        
        try {
            IndexReader ireader = DirectoryReader.open(FSDirectory.open(new File(path)));
            IndexSearcher isearcher = new IndexSearcher(ireader);
            
            MultiFieldQueryParser parser = new MultiFieldQueryParser(
                    Version.LUCENE_4_9, new String[] { "name", "price", "detail" },
                    this.analyzer);
            
            keywords = QueryParser.escape(keywords);
            Query query = parser.parse(keywords);

            // search one result
            TopDocs results = isearcher.search(query, 1);
            ScoreDoc[] hits = results.scoreDocs;

            int numTotalHits = results.totalHits;
            System.out.println("total matching documents :" + numTotalHits);

            List<ProductBaseInfo> datas = new ArrayList<ProductBaseInfo>();
            for (int i = 0; i < hits.length; i++) {
                Document doc = isearcher.doc(hits[i].doc);
                ProductBaseInfo data = new ProductBaseInfo();
                data.setPid(doc.get("pid"));
                data.setName(doc.get("name"));
                data.setPrice(Double.parseDouble(doc.get("price")));
                data.setDetail(doc.get("detail"));
                
                datas.add(data);
            }
            ireader.close();
            
            return datas;
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        
        return null;
    }

}

  3. 要查询Object

package dto;

public class ProductBaseInfo {

    private String pid;
    
    private String name;
    
    private double price;
    
    private String detail;

    public String getPid() {
        return pid;
    }

    public void setPid(String pid) {
        this.pid = pid;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public double getPrice() {
        return price;
    }

    public void setPrice(double price) {
        this.price = price;
    }

    public String getDetail() {
        return detail;
    }

    public void setDetail(String detail) {
        this.detail = detail;
    }
    
    public ProductBaseInfo() {}

    public ProductBaseInfo(String pid, String name, double price, String detail) {
        super();
        this.pid = pid;
        this.name = name;
        this.price = price;
        this.detail = detail;
    }

    @Override
    public String toString() {
        return "ProductBaseInfo [pid=" + pid + ", name=" + name + ", price=" + price + ", detail=" + detail + "]";
    }
}

  4. 测试代码

  

package test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.junit.Test;

import dto.ProductBaseInfo;

public class TestLucene {
    
    ProductLuceneIndex index = new ProductLuceneIndex("/tmp/index");
    ProductLuceneSearch search = new ProductLuceneSearch("/tmp/index");
    
    @Test
    public void addIndex() {
        ProductBaseInfo info = new ProductBaseInfo("1", "螺蛳粉", 3.8, "螺蛳粉是广西柳州市的小吃米粉,具有辣、爽、鲜、酸、烫的独特风味");
        ProductBaseInfo info1 = new ProductBaseInfo("2", "老友粉", 3.8, "老友粉是广西南宁的本土美食,酸辣可口");
        ProductBaseInfo info2 = new ProductBaseInfo("3", "桂林米粉", 3.8, "桂林米粉是历史悠久的小吃,好吃");
        
        List<ProductBaseInfo> eat = new ArrayList<>();
        eat.add(info);
        eat.add(info1);
        eat.add(info2);
        
        index.addDocList(eat);
    }
    
    @Test
    public void searchIndex() {
        
        List<ProductBaseInfo> searchResult = search.SearchFromIndex("武汉");
        
        if(searchResult != null && searchResult.size() > 0) {
            System.out.println("Search result: " + searchResult.get(0).toString());
        }
    }
    
    @Test
    public void updateIndex() throws IOException {
        ProductBaseInfo info2 = new ProductBaseInfo("3", "热干面", 3.8, "武汉热干面");
        index.updateDoc("3", info2);
    }
    
    @Test
    public void deleteAllIndex() throws IOException {
        index.deleteAll();
    }
}

外链:Lucene五分钟教程

总结: 第一篇博客,还不懂怎么排版, 代码是拼凑出来了, 理解还差的远,有待更新。

 

posted @ 2015-11-29 16:17  航小蒋  阅读(639)  评论(0)    收藏  举报