Lucene3.5自学系列1-建立索引

备注：

Field.Store.YES, 标志该数据源（字段）被存储，如果为NO当然闪不存储

Field.Index.ANALYZED标志该数据源（字段）被索引，如果为NO当然是不索引，注意这里索引闪经过了分词解释器处理后得结果

API解释为：

Enum Constant Summary
`ANALYZED` Index the tokens produced by running the field's value through an Analyzer.
`ANALYZED_NO_NORMS` Expert: Index the tokens produced by running the field's value through an Analyzer, and also separately disable the storing of norms.
`NO` Do not index the field value.
`NOT_ANALYZED` Index the field's value without using an Analyzer, so it can be searched.
`NOT_ANALYZED_NO_NORMS` Expert: Index the field's value without an Analyzer, and also disable the indexing of norms.

在3.5以前得版本中还有Field是发分词的选项

//////////////////////////////////////源码//////////////////////////////////////

 1 package cn.swust.lucene;
 2 
 3 import java.io.File;
 4 import java.io.IOException;
 5 
 6 import org.apache.lucene.analysis.Analyzer;
 7 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 8 import org.apache.lucene.document.Document;
 9 import org.apache.lucene.document.Field;
10 import org.apache.lucene.index.IndexWriter;
11 import org.apache.lucene.index.IndexWriterConfig;
12 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
13 import org.apache.lucene.store.Directory;
14 import org.apache.lucene.store.FSDirectory;
15 import org.apache.lucene.util.Version;
16 
17 import cn.swust.tool.ToolUtil;
18 
19 /**
20  * 创建文档索引类
21  * @author qingfeideyi
22  *
23  */
24 public class IndexProcessor {
25     private String INDEX_STORe_PATH = "/home/qingfeideyi/文档/indexstore/";
26     public IndexProcessor(){};
27     public IndexProcessor(String index_path)
28     {
29         this.INDEX_STORe_PATH = index_path;
30     }
31     
32     //创建索引
33     public void createIndex(String inputDir) throws IOException
34     {
35         boolean isCreate = true;
36         //待创建得文档目录
37         Directory dir = FSDirectory.open(new File(INDEX_STORe_PATH));
38         //选择得分词工具
39         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
40         //建立索引的配置类，包含了一个解析器
41         IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);
42          //设置我们的解析器是新建还是追加更新
43         if(isCreate){
44             iwc.setOpenMode(OpenMode.CREATE);//每次建立都覆盖原来的索引
45         }
46         else
47             iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);//每次都追加更新
48 //索引的建立类 第一个参数索引的存放位置，第二个参数索引的配置对象
49         IndexWriter writer = new IndexWriter(dir, iwc);
50         
51         File fileDir = new File(inputDir);
52         File[] files = fileDir.listFiles();
53         for (int i = 0; i < files.length; i++) 
54         {
55             String fileName = files[i].getName();
56             if(fileName.endsWith(".txt"))
57             {
58                 System.out.println(fileName);
59                 Document doc = new Document();
60                 Field f0 = new Field("FileName", fileName, Field.Store.YES, Field.Index.ANALYZED);
61                 doc.add(f0);
62                 Field f1 = new Field("Content",ToolUtil.loadFileToString(files[i]),Field.Store.YES,Field.Index.NOT_ANALYZED);
63                 doc.add(f1);
64                 /**
65                  * 同理还可以建立其他的Field：如时间，文件大小，等等字段
66 */
67                 writer.addDocument(doc);
68                 writer.close();
69             }
70             //这个方法在新增索引的情况会很有用，就是讲原来散落的索引文件重新进行整理合并！
71 //  writer.forceMerge(1);
72         }
73     }
74     public static void main(String[] args) {
75         String inputPath = "/home/qingfeideyi/文档/lucene";
76         IndexProcessor ip = new IndexProcessor();
77         try {
78             ip.createIndex(inputPath);
79         } catch (IOException e) {
80             // TODO Auto-generated catch block
81             e.printStackTrace();
82         }
83     }
84 }

posted @ 2012-02-28 21:23 情非得已swust 阅读(826) 评论(0) 编辑收藏举报

情非得已swust

Lucene3.5自学系列1-建立索引

公告