Lucene HelloWorld实现

package com.java1234.lucene;

import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Indexer {

    private IndexWriter writer;  //写索引实例
    
    /**
     * 索引的构造方法
     * @param indexDir
     * @throws Exception
     */
    public Indexer(String indexDir)throws Exception{
        Directory dir=FSDirectory.open(Paths.get(indexDir)); //找到索引存放的路径
        Analyzer analyzer=new StandardAnalyzer(); //标准分词器（只对英文管用）
        IndexWriterConfig conf=new IndexWriterConfig(analyzer); //索引的配置（需要传入一个分词器）
        writer=new IndexWriter(dir, conf);
    }
    
    
    /**
     * 关闭写索引
     * @throws Exception
     */
    public void close()throws Exception{
        writer.close();
    }
    
    /**
     * 对指定目录的所有文件进行写索引
     * @param dataDir
     * @throws Exception
     */
    public int index(String dataDir)throws Exception{
        File []files=new File(dataDir).listFiles();
        for(File f:files){
            indexFile(f);
        }
        return writer.numDocs(); //把索引文件的数量返回
    }


    /**
     * 索引指定的文件
     * @param f
     * @throws Exception
     */
    private void indexFile(File f)throws Exception {
        System.out.println("索引文件："+f.getCanonicalPath());
        Document doc=getDocument(f); //读取目标索引文件
        writer.addDocument(doc); //把读取后生成后的文档放到，写索引的工具里
    }


    /**
     * 获取文档，文档里再设置每个字段(相当于数据库里的一条记录，每个字段有它自己的值（值是从目标索引文件里取）)
     * @param f
     */
    private Document getDocument(File f) throws Exception{
        Document doc=new Document();
        doc.add(new TextField("contents", new FileReader(f))); //把文档（目标索引文件）的内容读取放进doc里（用键值对的形式）
        doc.add(new TextField("fileName", f.getName(),Field.Store.YES)); //把文档（目标索引文件）的名字读取到doc里
        doc.add(new TextField("fullPath", f.getCanonicalPath(),Field.Store.YES)); //把文档（目标索引文件）的全路径读取到doc里
        return doc;
    }
    
    public static void main(String[] args) {
        String indexDir="E:\\lucene"; //索引存放的目录
        String dataDir="E:\\lucene\\data"; // 目标索引文件的目录
        Indexer indexer=null;
        int  numIndexed=0;
        long start=System.currentTimeMillis();
        try {
            indexer=new Indexer(indexDir); //实例化索引
            numIndexed=indexer.index(dataDir);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }finally{
            try {
                indexer.close();
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        long end=System.currentTimeMillis();
        System.out.println("索引："+numIndexed+"个文件   花费了"+(end-start)+"毫秒");
    }
    
}

package com.java1234.lucene;

import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Search {

    
    public static void search(String indexDir,String q)throws Exception{
        Directory dir=FSDirectory.open(Paths.get(indexDir)); //获取索引路径
        IndexReader reader=DirectoryReader.open(dir); //索引读取器
        IndexSearcher searcher=new IndexSearcher(reader); //索引查询器
        Analyzer analyzer=new StandardAnalyzer(); //选定分析的方法
        QueryParser parser=new QueryParser("contents", analyzer); //分析器实例化
        Query query=parser.parse(q); //把指定查询的内容放入分析器中
        long start=System.currentTimeMillis();
        TopDocs docs=searcher.search(query,10); //用索引查询器进行查询
        long end=System.currentTimeMillis();
        System.out.println("匹配 "+q+" ，总共花费"+(end-start)+"毫秒"+"查询到"+docs.totalHits+"个记录");
        for(ScoreDoc scoreDoc:docs.scoreDocs){
            Document document=searcher.doc(scoreDoc.doc);
            System.out.println(document.get("fullPath"));
        }
        reader.close();
    }
    
    public static void main(String[] args) {
        String indexDir="D:\\lucene";
        String q="Zygmunt Saloni";
        try {
            search(indexDir,q);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

posted @ 2016-03-18 18:33 DMC_HZP 阅读(224) 评论(1) 收藏举报

刷新页面返回顶部

DMC_HZP

Lucene HelloWorld实现

公告