[web] lucene 搜索入门

来一段关于lucene的维基百科介绍：

Lucene是一套用于全文检索和搜寻的开放源码程式库，由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程序界面，能够做全文索引和搜寻，在Java开发环境里Lucene是一个成熟的免费开放源代码工具；就其本身而论，Lucene是现在并且是这几年，最受欢迎的免费Java资讯检索程式库。

这个demo是基于本地文件的搜索，搞清楚原理，其他扩展就不是太困难了，此处demo用Apache lucene 5.3.0,刚刚下载的最新版本。

1.由本地文件生成索引文件：

package com.wa.xwolf.eap.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.*;
import java.nio.file.Paths;

/**
 *  基于lucene 5.3.0 的DEMO
 * Created by Administrator on 2015/9/10.
 */
public class IndexWriter {

    private static org.apache.lucene.index.IndexWriter indexWriter ;
    private  static Directory directory;
    private  static IndexWriterConfig indexWriterConfig ;
    private  static Analyzer analyzer;

    public IndexWriter(String souceFile) {

        try {
            //索引文件存储目录
            directory= FSDirectory.open(Paths.get(souceFile));
            //创建标准分析器
            analyzer=new StandardAnalyzer();
            indexWriterConfig=new IndexWriterConfig(analyzer);
            //索引写开启
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

            indexWriter = new org.apache.lucene.index.IndexWriter(directory,indexWriterConfig);
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    /**
     * 将要索引的文件转化返回一个Document对象
     * @param file
     * @return
     */
    public static Document  getDocuments(File file){
        Document document = new Document();
        try {
            FileInputStream fis = new FileInputStream(file);

            Reader reader = new BufferedReader(new InputStreamReader(fis));
            //创建域  name value 是否存储
            Field path = new StringField("file_path",file.getAbsolutePath(), Field.Store.YES);
            Field size = new LongField("size",file.length(), Field.Store.YES);
            Field content = new TextField("text",reader);
             document.add(path);
            document.add(size);
            document.add(content);
            return document;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * 由文件创建索引
     */
    public static  void writeIndex(String srcFile) throws IOException {
        File dirFile = new File(srcFile);
        if (dirFile.isDirectory()) {
            String[] files = dirFile.list();
            for (int i = 0; i < files.length; i++) {
                //遍历目录下的文件
                File file = new File(dirFile, files[i]);
                Document doc = getDocuments(file);
                System.out.println(dirFile+"目录正在创建索引 : " + file + "");
                indexWriter.addDocument(doc);
            }
        }else{
            System.out.println("文件正在创建索引 : " + dirFile + "");
            indexWriter.addDocument(getDocuments(dirFile));
        }
    }

    public static  void main(String[] args) throws  Exception{

        IndexWriter indexWriter1= new IndexWriter(Constants.LUCENE_INDEX_STORE);
        indexWriter1.writeIndex(Constants.LUCENE_FILE_STORE) ;
        //关闭  否则无法创建出segments_* 文件
        indexWriter.close();

    }

}

2. 根据生成的索引进行搜索

package com.wa.xwolf.eap.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.nio.file.Paths;

/**
 * Created by Administrator on 2015/9/10.
 */
public class Search {
    private  static IndexSearcher indexSearcher;

    private static Query query ;

    private  static IndexReader  indexReader;



    public Search(String path){
        try {
            indexReader= DirectoryReader.open(FSDirectory.open(Paths.get(path)));
            indexSearcher=new IndexSearcher(indexReader);
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    /**
     * 获取查询结果对象
     * @param keys  要搜索的关键字
     * @param fields  对应的域
     * @return
     */
    private static TopDocs getResult(String[] keys,String[] fields){

        Analyzer analyzer = new StandardAnalyzer();
        try {
              /*用基本的实现类实现*/
            //传入两个参数  （域，分析器）
          //  QueryParser queryParser = new QueryParser(fields[0],analyzer);
            //要搜索的关键字
          //   query=  queryParser.parse(keys[0]);
              /*用提供的工具方法实现
              * 传入对应的数组
              * */
           query=   QueryParserUtil.parse(keys,fields,analyzer);

          TopDocs topDocs= indexSearcher.search(query, 100);
            return topDocs;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    public static void main(String[] args) {

        /**
         * 中文不支持搜索
         */
        Search search = new Search(Constants.LUCENE_INDEX_STORE);
        //关键字
       String[] keys = {"lucene","F"};
        //要查询的域
       String[] fields = {"text","file_path"};
        TopDocs docs =search.getResult(keys,fields);
        ScoreDoc[] scoreDocs = docs.scoreDocs;
        if(scoreDocs.length==0){
            System.out.println("没有找到内容...");
        }else{

            for(int i=0;i<scoreDocs.length;i++){
                try {
                    Document doc = indexSearcher.doc(scoreDocs[i].doc);
                    System.out.print("这是第" + i + "个检索到的结果，文件名为");
                    System.out.println(doc.get("file_path"));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }

        }

    }



}

3.用到的两个文件目录：

package com.wa.xwolf.eap.search;

/**
 * Created by Administrator on 2015/9/10.
 */
public class Constants {
    
    public static final String LUCENE_FILE_STORE="F:\\lucene\\file";
    public static  final String LUCENE_INDEX_STORE="F:\\lucene\\index";
}