Lucene3.5自学系列1-建立索引

备注:
Field.Store.YES, 标志该数据源(字段)被存储,如果为NO当然闪不存储
Field.Index.ANALYZED标志该数据源(字段)被索引,如果为NO当然是不索引,注意这里索引闪经过了分词解释器处理后得结果
API解释为:
Enum Constant Summary
ANALYZED 
          Index the tokens produced by running the field's value through an Analyzer.
ANALYZED_NO_NORMS 
          Expert: Index the tokens produced by running the field's value through an Analyzer, and also separately disable the storing of norms.
NO 
          Do not index the field value.
NOT_ANALYZED 
          Index the field's value without using an Analyzer, so it can be searched.
NOT_ANALYZED_NO_NORMS 
          Expert: Index the field's value without an Analyzer, and also disable the indexing of norms.
在3.5以前得版本中还有Field是发分词的选项
//////////////////////////////////////源码//////////////////////////////////////
 1 package cn.swust.lucene;
2
3 import java.io.File;
4 import java.io.IOException;
5
6 import org.apache.lucene.analysis.Analyzer;
7 import org.apache.lucene.analysis.standard.StandardAnalyzer;
8 import org.apache.lucene.document.Document;
9 import org.apache.lucene.document.Field;
10 import org.apache.lucene.index.IndexWriter;
11 import org.apache.lucene.index.IndexWriterConfig;
12 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
13 import org.apache.lucene.store.Directory;
14 import org.apache.lucene.store.FSDirectory;
15 import org.apache.lucene.util.Version;
16
17 import cn.swust.tool.ToolUtil;
18
19 /**
20 * 创建文档索引类
21 * @author qingfeideyi
22 *
23 */
24 public class IndexProcessor {
25 private String INDEX_STORe_PATH = "/home/qingfeideyi/文档/indexstore/";
26 public IndexProcessor(){};
27 public IndexProcessor(String index_path)
28 {
29 this.INDEX_STORe_PATH = index_path;
30 }
31
32 //创建索引
33 public void createIndex(String inputDir) throws IOException
34 {
35 boolean isCreate = true;
36 //待创建得文档目录
37 Directory dir = FSDirectory.open(new File(INDEX_STORe_PATH));
38 //选择得分词工具
39 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
40 //建立索引的配置类,包含了一个解析器
41 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);
42 //设置我们的解析器是新建还是追加更新
43 if(isCreate){
44 iwc.setOpenMode(OpenMode.CREATE);//每次建立都覆盖原来的索引
45 }
46 else
47 iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);//每次都追加更新
48 //索引的建立类 第一个参数索引的存放位置,第二个参数索引的配置对象
49 IndexWriter writer = new IndexWriter(dir, iwc);
50
51 File fileDir = new File(inputDir);
52 File[] files = fileDir.listFiles();
53 for (int i = 0; i < files.length; i++)
54 {
55 String fileName = files[i].getName();
56 if(fileName.endsWith(".txt"))
57 {
58 System.out.println(fileName);
59 Document doc = new Document();
60 Field f0 = new Field("FileName", fileName, Field.Store.YES, Field.Index.ANALYZED);
61 doc.add(f0);
62 Field f1 = new Field("Content",ToolUtil.loadFileToString(files[i]),Field.Store.YES,Field.Index.NOT_ANALYZED);
63 doc.add(f1);
64 /**
65 * 同理还可以建立其他的Field:如时间,文件大小,等等字段
66 */
67 writer.addDocument(doc);
68 writer.close();
69 }
70 //这个方法在新增索引的情况会很有用,就是讲原来散落的索引文件重新进行整理合并!
71 // writer.forceMerge(1);
72 }
73 }
74 public static void main(String[] args) {
75 String inputPath = "/home/qingfeideyi/文档/lucene";
76 IndexProcessor ip = new IndexProcessor();
77 try {
78 ip.createIndex(inputPath);
79 } catch (IOException e) {
80 // TODO Auto-generated catch block
81 e.printStackTrace();
82 }
83 }
84 }


posted @ 2012-02-28 21:23  情非得已swust  阅读(826)  评论(0编辑  收藏  举报