Lucene3.0 基本索引操作
2011-12-31 14:30 _9527 阅读(260) 评论(0) 收藏 举报package demo.indexing;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import util.TestUtil;
public class IndexingTest extends TestCase {
// 测试数据
protected String[] ids = { "1", "2" };
protected String[] unindexed = { "Netherlands", "Italy" };
protected String[] unstored = { "Amsterdam has lots of bridges",
"Vencie has lots of canals" };
protected String[] text = { "Amsterdam", "Venice" };
private Directory directory;
/** 每次测试前运行 */
@Override
protected void setUp() throws Exception {
directory = new RAMDirectory();
// 创建IndexWriter对象
IndexWriter writer = getWriter();
// 添加文档
for (int i = 0; i < ids.length; i++) {
Document doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("country", unindexed[i], Field.Store.YES,
Field.Index.NO));
doc.add(new Field("contents", unstored[i], Field.Store.NO,
Field.Index.ANALYZED));
doc.add(new Field("city", text[i], Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
}
/** 创建IndexWriter对象 */
private IndexWriter getWriter() throws IOException {
return new IndexWriter(directory, // 索引对象存储与该类
new WhitespaceAnalyzer(), // 分析器,被用来索引语汇单元化得域
IndexWriter.MaxFieldLength.UNLIMITED); // MaxFieldLength.UNLIMITED,指示IndexWriter索引文档中所有得语汇单元
}
/** 创建IndexSearcher对象,并通过指定字符串来执行基本的单项查询,返回与查询内容匹配得文档数 */
protected int getHitCount(String fieldName, String searchString)
throws IOException {
// 创建新的IndexSearcher
IndexSearcher searcher = new IndexSearcher(directory);
// 建立简单的单term查询
Term t = new Term(fieldName, searchString);
Query query = new TermQuery(t);
// 获取命中数
// TestUtil.hitCount(searcher, query)是一个工具类,该方法调用搜索模块,并返回匹配查询条件得结果总数
int hitCount = TestUtil.hitCount(searcher, query);
searcher.close();
return hitCount;
}
public void testIndexWriter() throws IOException {
IndexWriter writer = getWriter();
// 核对写入的文档数
assertEquals(ids.length, writer.numDocs());
writer.close();
}
public void testIndexReader() throws IOException {
IndexReader reader = IndexReader.open(directory);
// 核对读入的文档数
assertEquals(ids.length, reader.maxDoc());
assertEquals(ids.length, reader.numDocs());
reader.close();
}
public void testDeleteBeforeOptimize() throws IOException {
IndexWriter writer = getWriter();
// 确认索引中的两个文档
assertEquals(2, writer.numDocs());
// 删除第一个文档
writer.deleteDocuments(new Term("id", "1"));
writer.commit();
// 确认被标记为删除得文档
assertTrue(writer.hasDeletions());
// 确认删除一个文档并剩余一个文档
assertEquals(2, writer.maxDoc());
assertEquals(1, writer.numDocs());
writer.close();
}
public void testDeleterAfterOptimize() throws IOException {
IndexWriter writer = getWriter();
assertEquals(2, writer.numDocs());
writer.deleteDocuments(new Term("id", "1"));
// 优化操作使删除生效
writer.optimize();
writer.commit();
// 确认没有删除文档并剩余一个文档
assertFalse(writer.hasDeletions());
assertEquals(1, writer.maxDoc());
assertEquals(1, writer.numDocs());
writer.close();
}
public void testUpdate() throws IOException {
assertEquals(1, getHitCount("city", "Amsterdam"));
IndexWriter writer = getWriter();
// 为"北京"建立新文档
Document doc = new Document();
doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("country", "China", Field.Store.YES, Field.Index.NO));
doc.add(new Field("contents", "Beijing is Royal park", Field.Store.NO,
Field.Index.ANALYZED));
doc.add(new Field("city", "Beijing", Field.Store.YES,
Field.Index.ANALYZED));
// 更新文档版本
writer.updateDocument(new Term("id", "1"), doc);
writer.close();
// 确认旧文档已删除
assertEquals(0, getHitCount("city", "Amsterdam"));
// 确认新文档已被索引
assertEquals(1, getHitCount("city", "Beijing"));
}
}
/* 添加文档的方法 : 1.addDocument(Document) : 使用默认的分析器添加文档,该分析器在创建IndexWriter对象时指定,用于语汇单元化操作 2.addDocument(Document,Analyzer) : 使用指定得分析器添加文档和语汇单元化操 删除文档的方法 : 1.deleteDocuments(Term) : 删除包含项的所有文档 2.deleteDocuments(Term[]) : 删除包含项数组任一元素得所有文档 3.deleteDocuments(Query) : 删除匹配查询语句的所有文档 4.deleteDocuments(Query[]) : 删除匹配查询语句数组任一元素的所有文档 5.deleteAll() : 删除索引中得所有文档 更新文档的方法 : 1.updateDocument(Term,Document) : 首先删除包含Term变量得所有文档,然后使用writer得默认分析器添加新文档 2.updateDocument(Term,Document,Analyzer) : 功能和上述一致,区别在于它可以指定分析器添加文档 */
下载:
浙公网安备 33010602011771号