第六步:Lucene查询索引(优化一)
package cn.harmel.lucene; import java.io.IOException; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser.Operator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField.Type; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; public class SearchIndex { public static void main(String[] args) throws IOException, ParseException { /*--------------------------------------------------------------------------------- * String queryString="title:\"美国\"";//包含“美国” * String queryString="NOT title:\"美国\" AND title:\"国\"";//不包含 “美国” 但是包含 “国” * String queryString="title:\"中\" AND title:\"国人\"";//即包含 “中” 又包含 ”国人“ * String queryString="title:\"张三\" OR title:\"李四\"";//包含 “张三” 或 ”李四“ * String queryString="id:[1 TO 5]";//查询 1<=id<=5 * String queryString="id:{1 TO 5}";//查询 1<id<5 * String queryString="title:\"中国\" AND NOT title:\"张三\" AND id:[1 TO 5]"; * String queryString="title:(中国 张三)";即包含 “中国” 又包含 ”张三“ ---------------------------------------------------------------------------------*/ String queryString="title:\"中国\""; Analyzer analyzer = new StandardAnalyzer(); //StandardAnalyzer:汉字一个一个字拆分索引 IndexSearcher is = getIndexSearcher(); QueryParser parser=new QueryParser("", analyzer); parser.setDefaultOperator(Operator.AND);//默认状态下,空格被认为是OR的关系,设置为空格为AND Query query = parser.parse(queryString); //排序 Sort sort = new Sort(new SortField("dt", Type.LONG, true)); TopDocs topDocs = is.search(query, 20,sort); System.out.println("总共匹配多少个:" + topDocs.totalHits); //获取查询结果集 ScoreDoc[] hits = topDocs.scoreDocs; //遍历结果集 for (ScoreDoc scoreDoc : hits) { Document document = is.doc(scoreDoc.doc); System.out.println(document.get("id")+"-"+document.get("name")+"-"+document.get("dt")+"-"+document.get("title")); } closeIndexSearcher(is); } /* * 解决:实现实时搜索功能 * 解决:在读取索引文件内容时,索引文件的打开操作IndexSearcher是个非常耗时耗资源的操作 * */ private static byte[] synchronized_r = new byte[0]; private static SearcherManager searcherManager = null; private static IndexSearcher getIndexSearcher() throws IOException { IndexSearcher indexSearcher = null; //保证在同一时刻最多只有一个线程执行该段代码 synchronized (synchronized_r) { if(searcherManager == null) { searcherManager = new SearcherManager(FSDirectory.open(Paths.get("E:\\LuceneIndex")), new SearcherFactory()); } /* * 这个方法同DirectoryReader.openIfChanged(dirReader)效果一样,其实底层还是调用的该方法实现的 * DirectoryReader.openIfChanged(dirReader)可以判断当前Reader对象的索引有没有被修改过 * 如果索引文件被更新过则重新加载该索引目录 * 但是这个时候的重新加载则比单纯的open(indexPath)要高效很多 * 它只是重新加载被更新过的文档,而单纯的open则是加载全部的文档 * */ searcherManager.maybeRefresh(); //借用一个IndexSearcher对象的引用,记住该对象用完之后要归还的,有借有还再借不难 indexSearcher = searcherManager.acquire(); } return indexSearcher; } private static void closeIndexSearcher(IndexSearcher indexSearcher) throws IOException { if(indexSearcher != null) { searcherManager.release(indexSearcher);//归还从SearcherManager处借来的IndexSearcher对象 } indexSearcher = null; } }