package com.java1234.lucene;
import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Indexer {
private IndexWriter writer; //写索引实例
/**
* 索引的构造方法
* @param indexDir
* @throws Exception
*/
public Indexer(String indexDir)throws Exception{
Directory dir=FSDirectory.open(Paths.get(indexDir)); //找到索引存放的路径
Analyzer analyzer=new StandardAnalyzer(); //标准分词器(只对英文管用)
IndexWriterConfig conf=new IndexWriterConfig(analyzer); //索引的配置(需要传入一个分词器)
writer=new IndexWriter(dir, conf);
}
/**
* 关闭写索引
* @throws Exception
*/
public void close()throws Exception{
writer.close();
}
/**
* 对指定目录的所有文件进行写索引
* @param dataDir
* @throws Exception
*/
public int index(String dataDir)throws Exception{
File []files=new File(dataDir).listFiles();
for(File f:files){
indexFile(f);
}
return writer.numDocs(); //把索引文件的数量返回
}
/**
* 索引指定的文件
* @param f
* @throws Exception
*/
private void indexFile(File f)throws Exception {
System.out.println("索引文件:"+f.getCanonicalPath());
Document doc=getDocument(f); //读取目标索引文件
writer.addDocument(doc); //把读取后生成后的文档放到,写索引的工具里
}
/**
* 获取文档,文档里再设置每个字段(相当于数据库里的一条记录,每个字段有它自己的值(值是从目标索引文件里取))
* @param f
*/
private Document getDocument(File f) throws Exception{
Document doc=new Document();
doc.add(new TextField("contents", new FileReader(f))); //把文档(目标索引文件)的内容读取放进doc里(用键值对的形式)
doc.add(new TextField("fileName", f.getName(),Field.Store.YES)); //把文档(目标索引文件)的名字读取到doc里
doc.add(new TextField("fullPath", f.getCanonicalPath(),Field.Store.YES)); //把文档(目标索引文件)的全路径读取到doc里
return doc;
}
public static void main(String[] args) {
String indexDir="E:\\lucene"; //索引存放的目录
String dataDir="E:\\lucene\\data"; // 目标索引文件的目录
Indexer indexer=null;
int numIndexed=0;
long start=System.currentTimeMillis();
try {
indexer=new Indexer(indexDir); //实例化索引
numIndexed=indexer.index(dataDir);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
indexer.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
long end=System.currentTimeMillis();
System.out.println("索引:"+numIndexed+"个文件 花费了"+(end-start)+"毫秒");
}
}
package com.java1234.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Search {
public static void search(String indexDir,String q)throws Exception{
Directory dir=FSDirectory.open(Paths.get(indexDir)); //获取索引路径
IndexReader reader=DirectoryReader.open(dir); //索引读取器
IndexSearcher searcher=new IndexSearcher(reader); //索引查询器
Analyzer analyzer=new StandardAnalyzer(); //选定分析的方法
QueryParser parser=new QueryParser("contents", analyzer); //分析器实例化
Query query=parser.parse(q); //把指定查询的内容放入分析器中
long start=System.currentTimeMillis();
TopDocs docs=searcher.search(query,10); //用索引查询器进行查询
long end=System.currentTimeMillis();
System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+docs.totalHits+"个记录");
for(ScoreDoc scoreDoc:docs.scoreDocs){
Document document=searcher.doc(scoreDoc.doc);
System.out.println(document.get("fullPath"));
}
reader.close();
}
public static void main(String[] args) {
String indexDir="D:\\lucene";
String q="Zygmunt Saloni";
try {
search(indexDir,q);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}