lucene 建立CRUD操作

IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库
这句会引发线程安全问题，在全剧终 IndexSearcher只能有一个对象才可以，所以在ArticleDocumentUtils中保存一个 并且引用它。
indexSearcher为了提高效率，也是在内存中有缓存的所以需要commit才能放入索引文件数据库中


数据库优化
  每次添加数据在索引文件夹下有很多小文件，为了合并小文件提高效率

//优化，合并多个小文件为一个打文件
LuceneUtils.getIndexWriter.optimize();


//配置当小文件的数量达到多少个后就自动合并为一个大文件，最小2，默认10
 LucenenUtils.getIndexWriter().setMergeFactor(3);
当增加数据的时候自动触发。

Lucene.java

 1 package cn.itcast._util;
 2 
 3 import java.io.File;
 4 import java.io.IOException;
 5 
 6 import org.apache.lucene.analysis.Analyzer;
 7 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 8 import org.apache.lucene.index.CorruptIndexException;
 9 import org.apache.lucene.index.IndexWriter;
10 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
11 import org.apache.lucene.store.Directory;
12 import org.apache.lucene.store.FSDirectory;
13 import org.apache.lucene.store.LockObtainFailedException;
14 import org.apache.lucene.util.Version;
15 
16 public class LuceneUtils {
17 
18     private static Directory directory; // 索引库目录
19     private static Analyzer analyzer; // 分词器
20 
21     private static IndexWriter indexWriter;
22 
23     static {
24         try {
25             // 这里应是读取配置文件得到的索引库目录
26             directory = FSDirectory.open(new File("./indexDir"));
27             analyzer = new StandardAnalyzer(Version.LUCENE_30);
28         } catch (IOException e) {
29             throw new RuntimeException(e);
30         }
31     }
32 
33     /**
34      * 获取全局唯一的IndexWriter对象
35      * 
36      * @return
37      */
38     public static IndexWriter getIndexWriter() {
39         // 在第一次使用IndexWriter是进行初始化
40         if (indexWriter == null) {
41             synchronized (LuceneUtils.class) { // 注意线程安全问题
42                 if (indexWriter == null) {
43                     try {
44                         indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
45                         System.out.println("=== 已经初始化 IndexWriter ===");
46                     } catch (Exception e) {
47                         throw new RuntimeException(e);
48                     }
49                 }
50             }
51 
52             // 指定一段代码，会在JVM退出之前执行。
53             Runtime.getRuntime().addShutdownHook(new Thread() {
54                 public void run() {
55                     try {
56                         indexWriter.close();
57                         System.out.println("=== 已经关闭 IndexWriter ===");
58                     } catch (Exception e) {
59                         throw new RuntimeException(e);
60                     }
61                 }
62             });
63         }
64 
65         return indexWriter;
66     }
67 
68     public static Directory getDirectory() {
69         return directory;
70     }
71 
72     public static Analyzer getAnalyzer() {
73         return analyzer;
74     }
75 
76 }

ArticleDocumentUtils.java

 1 package cn.itcast._util;
 2 
 3 import org.apache.lucene.document.Document;
 4 import org.apache.lucene.document.Field;
 5 import org.apache.lucene.document.Field.Index;
 6 import org.apache.lucene.document.Field.Store;
 7 import org.apache.lucene.util.NumericUtils;
 8 
 9 import cn.itcast._domain.Article;
10 
11 public class ArticleDocumentUtils {
12 
13     /**
14      * 把Article转为Document
15      * 
16      * @param article
17      * @return
18      */
19     public static Document articleToDocument(Article article) {
20         Document doc = new Document();
21 
22         String idStr = NumericUtils.intToPrefixCoded(article.getId()); // 一定要使用Lucene的工具类把数字转为字符串！
23         
24         doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED)); // 注意：唯一标示符一般选择Index.NOT_ANALYZED
25         doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));
26         doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));
27 
28         return doc;
29     }
30 
31     /**
32      * 把Document转为Article
33      * 
34      * @param doc
35      * @return
36      */
37     public static Article documentToArticle(Document doc) {
38         Article article = new Article();
39         
40         Integer id = NumericUtils.prefixCodedToInt(doc.get("id")); // 一定要使用Lucene的工具类把字符串转为数字！
41         
42         article.setId(id);
43         article.setTitle(doc.get("title"));
44         article.setContent(doc.get("content"));
45         
46         return article;
47     }
48 
49 }

View Code

QueryResult.java

 1 package cn.itcast._domain;
 2 
 3 import java.util.List;
 4 
 5 public class QueryResult {
 6     private List list; // 一段数据列表
 7     private int count; // 总记录数
 8 
 9     public QueryResult(List list, int count) {
10         this.list = list;
11         this.count = count;
12     }
13 
14     public List getList() {
15         return list;
16     }
17 
18     public void setList(List list) {
19         this.list = list;
20     }
21 
22     public int getCount() {
23         return count;
24     }
25 
26     public void setCount(int count) {
27         this.count = count;
28     }
29 
30 }

View Code

ArticleIndexDao.java

  1 package cn.itcast.b_indexdao;
  2 
  3 import java.io.IOException;
  4 import java.util.ArrayList;
  5 import java.util.List;
  6 
  7 import org.apache.lucene.document.Document;
  8 import org.apache.lucene.index.Term;
  9 import org.apache.lucene.queryParser.MultiFieldQueryParser;
 10 import org.apache.lucene.queryParser.QueryParser;
 11 import org.apache.lucene.search.IndexSearcher;
 12 import org.apache.lucene.search.Query;
 13 import org.apache.lucene.search.TopDocs;
 14 import org.apache.lucene.util.NumericUtils;
 15 import org.apache.lucene.util.Version;
 16 
 17 import cn.itcast._domain.Article;
 18 import cn.itcast._domain.QueryResult;
 19 import cn.itcast._util.ArticleDocumentUtils;
 20 import cn.itcast._util.LuceneUtils;
 21 
 22 public class ArticleIndexDao {
 23 
 24     /**
 25      * 保存到索引库（建立索引）
 26      * 
 27      * @param article
 28      */
 29     public void save(Article article) {
 30         // 1，把Article转为Document
 31         Document doc = ArticleDocumentUtils.articleToDocument(article);
 32 
 33         // 2，添加到索引库中
 34         try {
 35             LuceneUtils.getIndexWriter().addDocument(doc); // 添加
 36             LuceneUtils.getIndexWriter().commit(); // 提交更改
 37         } catch (Exception e) {
 38             throw new RuntimeException(e);
 39         }
 40     }
 41 
 42     /**
 43      * 删除索引
 44      * 
 45      * Term ：某字段中出现的某一个关键词（在索引库的目录中）
 46      * 
 47      * @param id
 48      */
 49     public void delete(Integer id) {
 50         try {
 51             String idStr = NumericUtils.intToPrefixCoded(id); // 一定要使用Lucene的工具类把数字转为字符串！
 52             Term term = new Term("id", idStr);
 53 
 54             LuceneUtils.getIndexWriter().deleteDocuments(term); // 删除所有含有这个Term的Document
 55             LuceneUtils.getIndexWriter().commit(); // 提交更改
 56         } catch (Exception e) {
 57             throw new RuntimeException(e);
 58         }
 59     }
 60 
 61     /**
 62      * 更新索引
 63      * 
 64      * @param article
 65      */
 66     public void update(Article article) {
 67         try {
 68             Term term = new Term("id", NumericUtils.intToPrefixCoded(article.getId())); // 一定要使用Lucene的工具类把数字转为字符串！
 69             Document doc = ArticleDocumentUtils.articleToDocument(article);
 70 
 71             LuceneUtils.getIndexWriter().updateDocument(term, doc); // 更新就是先删除再添加
 72             LuceneUtils.getIndexWriter().commit(); // 提交更改
 73 
 74             // indexWriter.deleteDocuments(term);
 75             // indexWriter.addDocument(doc);
 76         } catch (Exception e) {
 77             throw new RuntimeException(e);
 78         }
 79     }
 80 
 81     /**
 82      * * 搜索   用于分页的
 83      * 
 84      * @param queryString
 85      *            查询条件
 86      * @param first
 87      *            从结果列表的哪个索引开始获取数据
 88      * @param max
 89      *            最多获取多少条数据（如果没有这么多，就把剩余的都返回）
 90      * 
 91      * @return 一段数据列表 + 符合条件的总记录数
 92      */
 93     public QueryResult search(String queryString, int first, int max) {
 94         IndexSearcher indexSearcher = null;
 95         try {
 96             // 1，把查询字符串转为Query对象（在title与content中查询）
 97             QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "title", "content" }, LuceneUtils.getAnalyzer());
 98             Query query = queryParser.parse(queryString);
 99 
100             // 2，执行查询，得到中间结果
101             indexSearcher = new IndexSearcher(LuceneUtils.getDirectory());
102             TopDocs topDocs = indexSearcher.search(query, first + max); // 最多返回前n条数据，这里要计算好，要返回足够数量的数据
103             int count = topDocs.totalHits; // 符合条件的总记录数
104 
105             // 3，处理数据
106             List<Article> list = new ArrayList<Article>();
107             int endIndex = Math.min(first + max, topDocs.scoreDocs.length); // 计算结束的边界
108 
109             for (int i = first; i < endIndex; i++) { // 应只取一段数据
110                 // 根据内部编号获取真正的Document数据
111                 int docId = topDocs.scoreDocs[i].doc;
112                 Document doc = indexSearcher.doc(docId);
113                 // 把Document转换为Article
114                 Article article = ArticleDocumentUtils.documentToArticle(doc);
115                 list.add(article);
116             }
117 
118             // 4，封装结果并返回
119             return new QueryResult(list, count);
120 
121         } catch (Exception e) {
122             throw new RuntimeException(e);
123         } finally {
124             // 关闭IndexSearcher
125             if (indexSearcher != null) {
126                 try {
127                     indexSearcher.close();
128                 } catch (IOException e) {
129                     throw new RuntimeException(e);
130                 }
131             }
132         }
133     }
134 }

不分页的查询

LuceneUtils.getIndexWriter()

 1     public List<Article> searchArticle(String condition) {
 2         // 执行搜索
 3         List<Article> list = new ArrayList<Article>();
 4         IndexSearcher indexSearcher = null;
 5         try {
 6             // 1，把查询字符串转为Query对象（默认只从title中查询）
 7             QueryParser queryParser = new MultiFieldQueryParser(
 8                     Version.LUCENE_30, new String[] { "title", "content" },
 9                     LuceneUtils.getAnalyzer());
10             Query query = queryParser.parse(condition);
11 
12             // 2，执行查询，得到中间结果
13             //indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库，会引发线程安全问题
　　　　　　　　　indexSearcher=LuceneUtils.getIndexWriter();

14             TopDocs topDocs = indexSearcher.search(query, 1000); // 最多返回前n条结果
15             int count = topDocs.totalHits;
16             System.out.println("scoreDocs.length"+topDocs.scoreDocs.length);        //一样
17             System.out.println("count"+count);                //一样
18             ScoreDoc[] scoreDocs = topDocs.scoreDocs;
19 
20             // 3，处理结果
21             for (int i = 0; i < scoreDocs.length; i++) {
22                 ScoreDoc scoreDoc = scoreDocs[i];
23                 float score = scoreDoc.score; // 相关度得分
24                 int docId = scoreDoc.doc; // Document的内部编号
25 
26                 // 根据编号拿到Document数据
27                 Document document = indexSearcher.doc(docId);
28 
29                 // 把Document转为Article
30                 Article article=ArticleDocumentUtils.documentToArticle(document);
31 
32                 list.add(article);
33             }
34         } catch (Exception e) {
35             throw new RuntimeException();
36         } finally {
37             try {
38                 if (null != indexSearcher)
39                     indexSearcher.close();
40             } catch (Exception e) {
41                 e.printStackTrace();
42             }
43         }
44         return list;
45     }

ArticleIndexDaoTest.java

 1 package cn.itcast.b_indexdao;
 2 
 3 import java.util.List;
 4 
 5 import org.junit.Test;
 6 
 7 import cn.itcast._domain.Article;
 8 import cn.itcast._domain.QueryResult;
 9 
10 public class ArticleIndexDaoTest {
11 
12     private ArticleIndexDao indexDao = new ArticleIndexDao();
13 
14     @Test
15     public void testSave() {
16         // 准备数据
17         Article article = new Article();
18         article.setId(1);
19         article.setTitle("准备Lucene的开发环境");
20         article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案，根本无法在有限的时间内返回结果。");
21 
22         // 放到索引库中
23         indexDao.save(article);
24     }
25 
26     @Test
27     public void testSave_25() {
28         for (int i = 1; i <= 25; i++) {
29             // 准备数据
30             Article article = new Article();
31             article.setId(i);
32             article.setTitle("准备Lucene的开发环境");
33             article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案，根本无法在有限的时间内返回结果。");
34 
35             // 放到索引库中
36             indexDao.save(article);
37         }
38     }
39 
40     @Test
41     public void testDelete() {
42         indexDao.delete(1);
43     }
44 
45     @Test
46     public void testUpdate() {
47         // 准备数据
48         Article article = new Article();
49         article.setId(1);
50         article.setTitle("准备Lucene的开发环境");
51         article.setContent("这是更新后的内容");
52 
53         // 更新到索引库中
54         indexDao.update(article);
55     }
56     //用于分页的
57     @Test
58     public void testSearch() {
59         // 准备查询条件
60         String queryString = "lucene";
61         // String queryString = "hibernate";
62 
63         // 执行搜索
64         // QueryResult qr = indexDao.search(queryString, 0, 10000);
65 
66         // QueryResult qr = indexDao.search(queryString, 0, 10); // 第1页，每页10条
67         // QueryResult qr = indexDao.search(queryString, 10, 10); // 第2页，每页10条
68         QueryResult qr = indexDao.search(queryString, 20, 10); // 第3页，每页10条
69 
70         // 显示结果
71         System.out.println("总结果数：" + qr.getCount());
72         for (Article a : (List<Article>) qr.getList()) {
73             System.out.println("------------------------------");
74             System.out.println("id = " + a.getId());
75             System.out.println("title = " + a.getTitle());
76             System.out.println("content = " + a.getContent());
77         }
78     }
79 
80 }

不分页查询测试

 1 @Test
 2     public void testSearchArticle() {
 3         // 准备查询条件
 4         String queryString = "lucene的";
 5         // String queryString = "hibernate";
 6 
 7         // 执行搜索
 8         List<Article> list =dao.searchArticle(queryString);
 9         
10         // 显示结果
11         System.out.println("总结果数：" + list.size());
12         for (Article a : list) {
13             System.out.println("------------------------------");
14             System.out.println("id = " + a.getId());
15             System.out.println("title = " + a.getTitle());
16             System.out.println("content = " + a.getContent());
17         }
18     }

posted on 2014-06-18 20:31 wf110 阅读(1135) 评论(0) 收藏举报