IK Analyzer 简单使用
Lucene 是使用Java语言开发的全文检索引擎, IK Analyzer 是一个开源的、基于Java 语言开发的轻量级的中文分词工具包 。下面测试代码使用:Lucene-xxx-4.9.jar 、IKAnalyzer-2012_FF.jar
1. 创建Index
package test; import java.io.File; import java.io.IOException; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public abstract class AbstractLuceneIndex<E> { private String path = "index"; private IKAnalyzer analyzer = new IKAnalyzer(); public AbstractLuceneIndex(){} public AbstractLuceneIndex(String path) { this.path = path; } private Directory openDirectory() { File file = new File(path); if (!file.exists()) { file.mkdirs(); } if (!file.isDirectory()) { file.mkdirs(); } if (!file.canWrite()) { file.setWritable(true); } Directory dir = null; try { dir = FSDirectory.open(file); } catch (IOException e) { e.printStackTrace(); } return dir; } public void addDoc(E doc) { Directory dir = openDirectory(); if (dir == null) { System.out.println("Wrong !"); System.exit(1); } IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); try { IndexWriter iwriter = new IndexWriter(dir, iwc); iwriter.addDocument(setDoc(doc)); iwriter.close(); } catch (IOException e) { e.printStackTrace(); } } public void addDocList(List<E> docs) { if (docs == null) return; Directory dir = openDirectory(); if (dir == null) { System.out.println("Wrong !"); System.exit(1); } IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); try { IndexWriter iwriter = new IndexWriter(dir, iwc); for(E doc : docs) { iwriter.addDocument(setDoc(doc)); } iwriter.close(); } catch (IOException e) { e.printStackTrace(); } } public void updateDoc(String id, E data) throws IOException { Directory dir = this.openDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = setDoc(data); writer.updateDocument(new Term("pid", id), doc); writer.close(); } public void deleteDoc(String id) throws IOException { Directory dir = this.openDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); IndexWriter writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term("pid", id)); writer.close(); } public void deleteAll() throws IOException { Directory dir = this.openDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); IndexWriter writer = new IndexWriter(dir, iwc); writer.deleteAll(); writer.close(); } public abstract Document setDoc(E doc); }
实现:
package test; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import dto.ProductBaseInfo; public class ProductLuceneIndex extends AbstractLuceneIndex<ProductBaseInfo>{ public ProductLuceneIndex() {} public ProductLuceneIndex(String path) { super(path); } @Override public Document setDoc(ProductBaseInfo info) { Document doc = new Document(); doc.add(new StringField("pid", info.getPid(), Store.YES)); doc.add(new TextField("name", info.getName(), Store.YES)); doc.add(new DoubleField("price", info.getPrice(), Store.YES)); doc.add(new TextField("detail", info.getDetail(), Store.YES)); return doc; } }
2. 查询Index
package test; import java.util.List; import org.wltea.analyzer.lucene.IKAnalyzer; public abstract class AbstactLuceneSearch<E> { protected String path = "index"; protected IKAnalyzer analyzer = new IKAnalyzer(); public AbstactLuceneSearch() {}; public AbstactLuceneSearch(String path) { this.path = path; } public abstract List<E> SearchFromIndex(String keyword); }
实现:
package test; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import dto.ProductBaseInfo; public class ProductLuceneSearch extends AbstactLuceneSearch<ProductBaseInfo> { public ProductLuceneSearch(String path) { super(path); } @Override public List<ProductBaseInfo> SearchFromIndex(String keywords) { try { IndexReader ireader = DirectoryReader.open(FSDirectory.open(new File(path))); IndexSearcher isearcher = new IndexSearcher(ireader); MultiFieldQueryParser parser = new MultiFieldQueryParser( Version.LUCENE_4_9, new String[] { "name", "price", "detail" }, this.analyzer); keywords = QueryParser.escape(keywords); Query query = parser.parse(keywords); // search one result TopDocs results = isearcher.search(query, 1); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println("total matching documents :" + numTotalHits); List<ProductBaseInfo> datas = new ArrayList<ProductBaseInfo>(); for (int i = 0; i < hits.length; i++) { Document doc = isearcher.doc(hits[i].doc); ProductBaseInfo data = new ProductBaseInfo(); data.setPid(doc.get("pid")); data.setName(doc.get("name")); data.setPrice(Double.parseDouble(doc.get("price"))); data.setDetail(doc.get("detail")); datas.add(data); } ireader.close(); return datas; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } }
3. 要查询Object
package dto; public class ProductBaseInfo { private String pid; private String name; private double price; private String detail; public String getPid() { return pid; } public void setPid(String pid) { this.pid = pid; } public String getName() { return name; } public void setName(String name) { this.name = name; } public double getPrice() { return price; } public void setPrice(double price) { this.price = price; } public String getDetail() { return detail; } public void setDetail(String detail) { this.detail = detail; } public ProductBaseInfo() {} public ProductBaseInfo(String pid, String name, double price, String detail) { super(); this.pid = pid; this.name = name; this.price = price; this.detail = detail; } @Override public String toString() { return "ProductBaseInfo [pid=" + pid + ", name=" + name + ", price=" + price + ", detail=" + detail + "]"; } }
4. 测试代码
package test; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.junit.Test; import dto.ProductBaseInfo; public class TestLucene { ProductLuceneIndex index = new ProductLuceneIndex("/tmp/index"); ProductLuceneSearch search = new ProductLuceneSearch("/tmp/index"); @Test public void addIndex() { ProductBaseInfo info = new ProductBaseInfo("1", "螺蛳粉", 3.8, "螺蛳粉是广西柳州市的小吃米粉,具有辣、爽、鲜、酸、烫的独特风味"); ProductBaseInfo info1 = new ProductBaseInfo("2", "老友粉", 3.8, "老友粉是广西南宁的本土美食,酸辣可口"); ProductBaseInfo info2 = new ProductBaseInfo("3", "桂林米粉", 3.8, "桂林米粉是历史悠久的小吃,好吃"); List<ProductBaseInfo> eat = new ArrayList<>(); eat.add(info); eat.add(info1); eat.add(info2); index.addDocList(eat); } @Test public void searchIndex() { List<ProductBaseInfo> searchResult = search.SearchFromIndex("武汉"); if(searchResult != null && searchResult.size() > 0) { System.out.println("Search result: " + searchResult.get(0).toString()); } } @Test public void updateIndex() throws IOException { ProductBaseInfo info2 = new ProductBaseInfo("3", "热干面", 3.8, "武汉热干面"); index.updateDoc("3", info2); } @Test public void deleteAllIndex() throws IOException { index.deleteAll(); } }
外链:Lucene五分钟教程
总结: 第一篇博客,还不懂怎么排版, 代码是拼凑出来了, 理解还差的远,有待更新。

浙公网安备 33010602011771号