lucene4.4 索引的增删改查
由于本人的英文不好,好多的注释不准确。请谅解!
package com.lucene.test; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Date; import org.apache.log4j.Logger; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.IntField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; public class IndexUtil { private static final Logger LOGGER = Logger.getLogger(IndexUtil.class); private Directory directory = null; private DirectoryReader reader = null; private IndexWriterConfig config = null; private IndexWriter writer = null; public static final IndexUtil Instance = new IndexUtil(); private IndexUtil() { try { directory = FSDirectory.open(new File("D:/lucene/index")); config = new IndexWriterConfig(Version.LUCENE_44, new StandardAnalyzer(Version.LUCENE_44)); } catch (IOException e) { e.printStackTrace(); } } /** * 添加索引 * @throws IOException */ public void index() throws IOException { writer = new IndexWriter(directory, config); File file = new File("D:\\lucene\\example"); Document document = null; int id = 0; long start = new Date().getTime(); LOGGER.info("添加索引…………………………"); for (File f : file.listFiles()) { document = new Document(); document.add(new StringField("name", f.getName(), Store.YES)); document.add(new IntField("id", id++, Store.YES)); document.add(new StringField("path", f.getAbsolutePath(), Store.YES)); document.add(new TextField("context", new FileReader(f))); writer.addDocument(document); } long end = new Date().getTime(); LOGGER.info("添加索引完成,用时:" + (end - start) / 1000.0 + "s…………………………"); writer.close(); } /** * 查询索引 * @throws IOException * @throws ParseException */ public void search() throws IOException, ParseException { reader = DirectoryReader.open(directory); QueryParser parser = new QueryParser(Version.LUCENE_44, "context", new StandardAnalyzer(Version.LUCENE_44)); Query query = parser.parse("lucene"); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(query, 100); /** * reader.maxDoc()包含索引文档的总数 包含可用的 和已经删除的数量 * reader.numDocs()当前可用的索引文档的数量 不包含已经删除的 * reader.numDeletedDocs()删除的索引文档的数量 */ LOGGER.info("总记录:" + docs.totalHits + " 命中文档数:" + docs.scoreDocs.length + " 最大的文档数maxDoc:" + reader.maxDoc() + " 删除文件数numDeletedDocs:" + reader.numDeletedDocs() + " numDocs" + reader.numDocs()); for (ScoreDoc doc : docs.scoreDocs) { Document document = reader.document(doc.doc); LOGGER.info("id:" + document.get("id") + " name:" + document.get("name") + " path:" + document.get("path")); } reader.close(); } /** * 更新索引 * @throws IOException */ public void update() throws IOException { writer = new IndexWriter(directory, config); Document document = new Document(); document.add(new StringField("name", "新文件", Store.YES)); document.add(new IntField("id", 12, Store.YES)); document.add(new StringField("path", "D:\\lucene\\example\\新文件.txt", Store.YES)); writer.updateDocument(new Term("id", "2"),document); writer.commit(); writer.close(); } /** * 删除索引 删除的索引会保存到一个新的文件中(以del为结尾的文件 相当于删除到回收站) * @throws IOException */ public void delete() throws IOException { writer = new IndexWriter(directory, config); writer.deleteDocuments(new Term("name", "11.txt")); writer.close(); } /** * 删除所有的索引 删除的索引会保存到一个新的文件中(以del为结尾的文件 相当于删除到回收站) * @throws IOException */ public void deleteAll() throws IOException { writer = new IndexWriter(directory, config); writer.deleteAll(); writer.close(); } /** * 删除已经删除的索引 对应上一个删除方法 删除回收站的文件 * @throws IOException */ public void forceMergeDeletes() throws IOException { writer = new IndexWriter(directory, config); writer.forceMergeDeletes();// 清空回收站 writer.close(); } /** * 显示所有的索引 * @throws IOException */ public void showIndex() throws IOException { reader = DirectoryReader.open(directory); Fields fields = MultiFields.getFields(reader); //获取directory中所有的field for (String field : fields) { LOGGER.info(field); } //显示 field 中 context的所有的分词 Terms terms = fields.terms("context"); TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; while ((term=termsEnum.next()) !=null) { System.out.print(term.utf8ToString()+"\t");//分词的内容 System.out.print(termsEnum.docFreq()+"\t");//出现该分词的有文档的数量 System.out.print(termsEnum.totalTermFreq()+"\t");//分词的总数 DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.docsAndPositions(null, null); //如果要查询的字段 没有被分词 ,docsAndPositionsEnum就会为空 继续循环 if(docsAndPositionsEnum==null){ continue; } int docId ; while ((docId = docsAndPositionsEnum.nextDoc())!= DocIdSetIterator.NO_MORE_DOCS) { Document document = reader.document(docId);//获取document对象 System.out.print(docId+"\t");//分词的总数 System.out.print(document.get("name")+"\t");//可以获取document中field的值 int freq = docsAndPositionsEnum.freq();//该document中 该分词出现的次数 for (int i = 0; i < freq; i++) { System.out.print(docsAndPositionsEnum.nextPosition()+":"); //分词的位置 System.out.print("["+docsAndPositionsEnum.startOffset()+"");//分词起始偏移量的位置 System.out.print(docsAndPositionsEnum.endOffset()+"],");//分词结束偏移量的位置 System.out.print(docsAndPositionsEnum.getPayload()+"\t"); } } System.out.println(); } reader.close(); } }
浙公网安备 33010602011771号