Lucene搜索笔记
Lucene的常用检索类:
IndexSearcher :检索操作的核心组件,用于对 IndexWriter 创建的索引执行,只读的检索操作,工作模式为接收 Query 对象而返回 ScoreDoc对象。
Term :检索的基本单元,标示检索的字段名称和检索对象的值,如Term( “title”, “lucene” )。即表示在 title 字段中搜寻关键词 lucene 。
Query:表示查询的抽象类,由相应的 Term 来标识。
TermQuery :最基本的查询类型,用于匹配含有指定值字段的文档。
TopDocs:保存查询结果的类。
ScoreDoc(Hits):用来装载搜索结果文档队列指针的数组容器。
package com.fxr.demo3;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestSearcher {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();//创建标准的分词
String indexDir = "f:/lucenefxrindex";
Directory directory = FSDirectory.getDirectory(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(directory);
ScoreDoc []hits = null;
Term term = new Term("address","shanghai");
TermQuery termQuery = new TermQuery(term);
TopDocs topDocs = indexSearcher.search(termQuery, 10);
hits= topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document document = indexSearcher.doc(hits[i].doc);
System.out.println(hits[i].score);
System.out.println(document.get("id"));
System.out.println(document.get("name"));
System.out.println(document.get("address"));
}
indexSearcher.close();
directory.close();
}
}
通配符的查询:
在Lucene中也可以使用通配符查询:"*"是匹配任意长度的字符,"?"是匹配定长的字符。
package com.fxr.demo3;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestWildcardSearcher {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();//创建标准的分词
String indexDir = "f:/lucenefxrindex";
Directory directory = FSDirectory.getDirectory(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(directory);
ScoreDoc []hits = null;
Term term = new Term("address","z*");
WildcardQuery wildcardQuery = new WildcardQuery(term);
TopDocs topDocs = indexSearcher.search(wildcardQuery, 10);
hits= topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document document = indexSearcher.doc(hits[i].doc);
System.out.println(hits[i].score);
System.out.println(document.get("id"));
System.out.println(document.get("name"));
System.out.println(document.get("address"));
}
indexSearcher.close();
directory.close();
}
}
如何把查询结合起来使用?我们在实际需求中有时候需要这样做,这里需要BooleanQuery,BooleanQuery里面有几个参数,参数可以实现几个Query结合起来的使用
package com.fxr.demo3;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestWildcardSearcher {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();//创建标准的分词
String indexDir = "f:/lucenefxrindex";
Directory directory = FSDirectory.getDirectory(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(directory);
ScoreDoc []hits = null;
Term wterm = new Term("address","z*");
WildcardQuery wildcardQuery = new WildcardQuery(wterm);
Term tterm = new Term("name","zhangsan");
TermQuery termQuery = new TermQuery(tterm);
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);//必须出现
booleanQuery.add(termQuery, BooleanClause.Occur.SHOULD);//或的关系
TopDocs topDocs = indexSearcher.search(wildcardQuery, 10);
hits= topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document document = indexSearcher.doc(hits[i].doc);
System.out.println(hits[i].score);
System.out.println(document.get("id"));
System.out.println(document.get("name"));
System.out.println(document.get("address"));
}
indexSearcher.close();
directory.close();
}
}
指定范围的查询RangeQuery,可以查询区间的数据,
package com.firstproject.testsearch;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestRangeQuery {
public static void main(String[] args) throws IOException {
Analyzer analyzer=new StandardAnalyzer();
String indexDir = "f:/lucenefxrindex";
Directory dir=FSDirectory.getDirectory(indexDir);
IndexSearcher searcher=new IndexSearcher(dir);
ScoreDoc [] hits=null;
Term beginTerm=new Term("birthday","19820720");
Term endTerm=new Term("birthday","19830130");
RangeQuery query=new RangeQuery(beginTerm,endTerm,true);
TopDocs topDocs=searcher.search(query, 2);
hits=topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
// System.out.print(hits[i].score+" ");
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
searcher.close();
dir.close();
}
}
读取文件中的数据
package com.fxr.demo3;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestFileDataQuery {
public static void main(String[] args) throws IOException {
File file = new File("f:/data/data.txt");
FileReader reader = new FileReader(file);
//把文件下的数据读入到char数组中
char[]chs = new char[60000];
reader.read(chs);
String strtemp = new String(chs);
String[] strs = strtemp.split("Database: Compendex");
System.out.println(strs.length);
for(int i=0;i<strs.length;i++){
strs[i] = strs[i].trim();
}
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "f:/lucenefxrindex";
Directory directory = FSDirectory.getDirectory(indexDir);
IndexWriter indexWriter = new IndexWriter(directory,analyzer,IndexWriter.MaxFieldLength.UNLIMITED);
for(int i=0;i<strs.length;i++){
Document document = new Document();
document.add(new Field("contents",strs[i],Field.Store.YES,Field.Index.ANALYZED));
indexWriter.addDocument(document);
}
indexWriter.optimize();
indexWriter.close();
directory.close();
System.out.println("ok!");
}
}
package com.fxr.demo3;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestFileQuery {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();//创建标准的分词
String indexDir = "f:/lucenefxrindex";
Directory directory = FSDirectory.getDirectory(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(directory);
ScoreDoc []hits = null;
Term term = new Term("contents","*dsorption*");
WildcardQuery wildcardQuery = new WildcardQuery(term);
//TermQuery termQuery = new TermQuery(term);
TopDocs topDocs = indexSearcher.search(wildcardQuery, 126);
hits= topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document document = indexSearcher.doc(hits[i].doc);
System.out.println(hits[i].score);
System.out.println(document.get("contents"));
}
indexSearcher.close();
directory.close();
}
}
浙公网安备 33010602011771号