lucene最新版本3.3的基本功能用法

lucene最新版本是3.3的,用法和2.X的截然不同,很多在网上都搜不到.我简单的写了下3.3的基本功能用法.
包括:建立索引,查询等.

  1 /**
  2  * @作者 iceting
  3  * @日期 2011-7-28
  4  * @时间上午15:44:24
  5  * @描述
  6  * @版本
  7  */
  8 package test.lucene;
  9
10 import java.io.BufferedInputStream;
11 import java.io.File;
12 import java.io.FileInputStream;
13 import java.io.FileNotFoundException;
14 import java.io.IOException;
15 import java.io.StringReader;
16
17 import org.apache.lucene.analysis.Analyzer;
18 import org.apache.lucene.analysis.TokenStream;
19 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
20 import org.apache.lucene.analysis.standard.StandardAnalyzer;
21 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
22 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
23 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
24 import org.apache.lucene.document.Document;
25 import org.apache.lucene.document.Field;
26 import org.apache.lucene.document.Field.Index;
27 import org.apache.lucene.document.Field.Store;
28 import org.apache.lucene.index.IndexWriter;
29 import org.apache.lucene.index.IndexWriterConfig;
30 import org.apache.lucene.queryParser.MultiFieldQueryParser;
31 import org.apache.lucene.queryParser.QueryParser;
32 import org.apache.lucene.search.IndexSearcher;
33 import org.apache.lucene.search.Query;
34 import org.apache.lucene.search.TopDocs;
35 import org.apache.lucene.search.highlight.Formatter;
36 import org.apache.lucene.search.highlight.Fragmenter;
37 import org.apache.lucene.search.highlight.Highlighter;
38 import org.apache.lucene.search.highlight.QueryScorer;
39 import org.apache.lucene.search.highlight.Scorer;
40 import org.apache.lucene.search.highlight.SimpleFragmenter;
41 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
42 import org.apache.lucene.store.Directory;
43 import org.apache.lucene.store.FSDirectory;
44 import org.apache.lucene.store.RAMDirectory;
45 import org.apache.lucene.util.Version;
46
47 import com.chenlb.mmseg4j.analysis.SimpleAnalyzer;
48
49 /**
50  * @作者 loyal
51  * @日期 2011-7-15
52  * @时间上午10:44:24
53  * @描述
54  * @版本 v1.0
55  */
56 public class Test {
57
58
59     /**
60      * @描述
61      * @作者 loyal
62      * @日期 2011-7-15
63      * @时间上午10:44:24
64      * @param args
65      */
66     static Version matchVersion=Version.LUCENE_33;
67     static String indexPath ="C:\\index";
68     static String filePath = "files/testss.txt";
69     static Analyzer analyzer=new StandardAnalyzer(matchVersion);
70     static Analyzer a3=new CJKAnalyzer(matchVersion);//二分法分词
71     static Analyzer a4=new SimpleAnalyzer();//中文分词器mmseg4j中提供的一种分词器
72
73     public static byte[] getFileBytes(File file) {
74         byte[] res = null;
75         try {
76             FileInputStream fis = new FileInputStream(file);
77             BufferedInputStream bis = new BufferedInputStream(fis);
78             byte[] b = new byte[1024];
79             int len = 0;
80             while ((len = bis.read(b)) != -1) {
81                 res = addByte(res, b, len);
82             }
83             bis.close();
84             fis.close();
85             return res;
86         } catch (FileNotFoundException e) {
87             // TODO Auto-generated catch block
88             e.printStackTrace();
89         } catch (IOException e) {
90             // TODO Auto-generated catch block
91             e.printStackTrace();
92         }
93
94         return null;
95     }
96
97     public static byte[] addByte(byte[] array1, byte[] array2, int len) {
98         if (array1 == null && array2 == null) {
99             System.out.println("数组均为null,返回null!!!");
100             return null;
101         }
102         byte[] t = new byte[array1 == null ? len : array2 == null ? array1.length : (array1.length + len)];
103         if (array1 != null && array2 == null) {
104             System.arraycopy(array1, 0, t, 0, len > array1.length ? array1.length : len);
105             return t;
106         }
107         if (array1 == null) {
108             // t = Arrays.copyOfRange(array2, 0, len);//jdk 1.6
109             System.arraycopy(array2, 0, t, 0, len > array2.length ? array2.length : len);
110             return t;
111         }
112         System.arraycopy(array1, 0, t, 0, array1.length);// copy array1
113         System.arraycopy(array2, 0, t, array1.length, len);// copy array2
114         return t;
115     }
116
117     @org.junit.Test
118     public void testCreateIndex() throws Exception{
119         System.out.println(indexPath);
120         System.out.println(filePath);
121         Directory dir =FSDirectory.open(new File(indexPath)) ;
122         Document doc=new Document();
123         Field titleField=new Field("title",new File(filePath).getName(),Store.YES,Index.ANALYZED);
124         String content=new String(Test.getFileBytes(new File(filePath)));
125         Field contentField=new Field("content",content,Store.YES,Index.ANALYZED);
126         doc.add(titleField);
127         doc.add(contentField);
128         IndexWriterConfig iwc=new IndexWriterConfig(matchVersion, a4);
129         //默认create_or_append
130         iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
131         IndexWriter iw=new IndexWriter(dir, iwc) ;
132         iw.addDocument(doc);
133         iw.close();
134
135     }
136     @org.junit.Test
137     public void testSearch() throws Exception{
138         Directory dir =FSDirectory.open(new File(indexPath),null) ;
139         IndexSearcher is=new IndexSearcher(dir);
140         System.out.println(is.maxDoc());
141
142         String[] fields={"title","content"};
143         QueryParser qp=new MultiFieldQueryParser(matchVersion, fields, a4);
144 //        QueryParser qp=new QueryParser(matchVersion, "content", analyzer);
145         Query query=qp.parse("汉字");
146         //System.out.println(query.toString("content"));
147         TopDocs tDocs=is.search(query,10000);//一次查询多少个结果
148         // 准备高亮器
149         Formatter formatter=new SimpleHTMLFormatter("<span class=\"highlighter\">","</span>");
150         Scorer fragmentScorer=new QueryScorer(query);
151         Highlighter highlighter=new Highlighter(formatter, fragmentScorer);
152         Fragmenter fragmenter=new SimpleFragmenter(100);//高亮范围
153         highlighter.setTextFragmenter(fragmenter);
154
155         int numTotalHits = tDocs.totalHits;
156         System.out.println("总共有【"+numTotalHits+"】条结果");
157         System.out.println(tDocs.scoreDocs.length);
158         //
159         // int  k = tDocs.scoreDocs[0].doc ; //文档内部编号
160         //Document doc = is.doc(k) ; //更具文档编号取出对应文档
161         Document doc = is.doc(0);
162         //doc.getField("content");//获取属性值,与下相同
163         String content = doc.get("content");//获取属性值
164         //如果当前属性值中没有出现关键字,则返回null
165         String hc=highlighter.getBestFragment(a4, "content", content);
166         System.out.println("hc:"+hc);
167         if(hc==null){//如果无结果那么返回原文的前50个字符
168             hc=content.substring(0,Math.min(50,content.length()));
169         //    Field contentField=doc.getFieldable("content");
170         }
171         Field contentField=(Field) doc.getFieldable("content");
172         contentField.setValue(hc);
173 //        doc.getField("content").setValue(hc);
174         System.out.println(doc.get("content"));
175
176         TokenStream ts=a4.tokenStream("content", new StringReader(content));
177 //         System.out.println("token: "+ts.getAttribute(String.class).toString());
178         OffsetAttribute offsetAttribute = ts.getAttribute(OffsetAttribute.class);
179         TermAttribute termAttribute = ts.getAttribute(TermAttribute.class);
180         while (ts.incrementToken()) {
181             int startOffset = offsetAttribute.startOffset();
182             int endOffset = offsetAttribute.endOffset();
183             String term = termAttribute.term();
184             //System.out.println(term);
185         }
186     }
187     @org.junit.Test
188     public void testCreateRAMandFS() throws Exception{
189         Directory fsDir =FSDirectory.open(new File(indexPath)) ;
190         //1.将索引读取到内存中
191         Directory ramDir =new RAMDirectory(fsDir);
192         //2.填入文档
193         Document doc=new Document();
194         Field titleField=new Field("title",new File(filePath).getName(),Store.YES,Index.ANALYZED);
195         String content=new String(Test.getFileBytes(new File(filePath)));
196         Field contentField=new Field("content",content,Store.YES,Index.ANALYZED);
197         doc.add(titleField);
198         doc.add(contentField);
199         IndexWriterConfig ramiwc=new IndexWriterConfig(matchVersion, analyzer);
200         //默认create_or_append
201 //        ramiwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
202         IndexWriter ramiw=new IndexWriter(ramDir, ramiwc) ;
203         ramiw.addDocument(doc);
204         ramiw.close();
205         //3.关闭时,写入到文件
206         IndexWriterConfig fsiwc=new IndexWriterConfig(matchVersion, analyzer);
207         //默认create_or_append
208         fsiwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
209         IndexWriter fsiw=new IndexWriter(fsDir, fsiwc) ;
210         //将内存的索引文件加入到fsiw中
211         fsiw.addIndexes(ramDir);
212         fsiw.commit();
213         //优化索引文件(合并索引文件)
214         fsiw.optimize();
215         fsiw.close();
216         System.out.println("===执行完毕");
217     }
218 }

posted @ 2011-07-28 16:46 Iceting 阅读(447) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

Just Bug

lucene最新版本3.3的基本功能用法

公告