1.
1 package com.home.utils; 2 3 import java.util.ArrayList; 4 import java.util.List; 5 6 import org.apache.lucene.document.Document; 7 import org.apache.lucene.queryParser.MultiFieldQueryParser; 8 import org.apache.lucene.queryParser.QueryParser; 9 import org.apache.lucene.search.IndexSearcher; 10 import org.apache.lucene.search.MultiPhraseQuery; 11 import org.apache.lucene.search.Query; 12 import org.apache.lucene.search.ScoreDoc; 13 import org.apache.lucene.search.TopDocs; 14 import org.apache.lucene.util.Version; 15 import org.junit.Test; 16 17 public class SortTest { 18 /** 19 * 1、相同的结 构,相同的关键词,得分一样 2、相同的结构,不同的关键词,得分不一样,一般来说中文比较高 20 * 3、不同的结构,关键词出现的次数越多,得分越高 4、利用document.setBoost(100)可以人为的提高相关度得分 21 * 22 * @throws Exception 23 */ 24 @Test 25 public void testSearchIndex() throws Exception { 26 IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory); 27 QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, 28 new String[] { "title", "content" }, LuceneUtils.analyzer); 29 30 Query query = queryParser.parse("lucene"); 31 TopDocs topDocs = indexSearcher.search(query, 26); 32 33 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 34 List<Article> articleList = new ArrayList<Article>(); 35 for (ScoreDoc scoreDoc : scoreDocs) { 36 float score = scoreDoc.score; 37 System.out.println(score); 38 Document document = indexSearcher.doc(scoreDoc.doc); 39 Article article = DocumentUtils.document2Article(document); 40 articleList.add(article); 41 42 } 43 44 for(Article article:articleList){ 45 System.out.println(article.getId()); 46 System.out.println(article.getTitle()); 47 System.out.println(article.getContent()); 48 } 49 } 50 } 51
2.
1 package com.home.utils; 2 3 import java.util.ArrayList; 4 import java.util.List; 5 6 import org.apache.lucene.document.Document; 7 import org.apache.lucene.queryParser.MultiFieldQueryParser; 8 import org.apache.lucene.queryParser.QueryParser; 9 import org.apache.lucene.search.IndexSearcher; 10 import org.apache.lucene.search.Query; 11 import org.apache.lucene.search.ScoreDoc; 12 import org.apache.lucene.search.TopDocs; 13 import org.apache.lucene.search.highlight.Formatter; 14 import org.apache.lucene.search.highlight.Fragmenter; 15 import org.apache.lucene.search.highlight.Highlighter; 16 import org.apache.lucene.search.highlight.QueryScorer; 17 import org.apache.lucene.search.highlight.Scorer; 18 import org.apache.lucene.search.highlight.SimpleFragmenter; 19 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 20 import org.apache.lucene.util.Version; 21 import org.junit.Test; 22 23 /** 24 * 高亮 * 使关键字变色 * 设置 * 使用 * 控制摘要的大小 25 */ 26 public class HighlighterTest { 27 @Test 28 public void testSearchIndex() throws Exception { 29 IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory); 30 QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, 31 new String[] { "title", "content" }, LuceneUtils.analyzer); 32 Query query = queryParser.parse("Lucene"); 33 TopDocs topDocs = indexSearcher.search(query, 25); 34 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 35 36 /***********************************************************************/ 37 /** 38 * 给关键字加上前缀和后缀 39 */ 40 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", 41 "</font>"); 42 43 /** 44 * scorer封装了关键字 45 */ 46 Scorer scorer = new QueryScorer(query); 47 48 Highlighter highlighter = new Highlighter(formatter, scorer); 49 50 /** 51 * 创建一个摘要 52 */ 53 // //构建Fragmenter对象,用于文档切片 54 Fragmenter fragmenter = new SimpleFragmenter(10); 55 highlighter.setTextFragmenter(fragmenter); 56 57 /***********************************************************************/ 58 List<Article> articleList = new ArrayList<Article>(); 59 for (ScoreDoc scoreDoc : scoreDocs) { 60 float score = scoreDoc.score; 61 System.out.println(score);// 相关的得分 62 Document document = indexSearcher.doc(scoreDoc.doc); 63 Article article = DocumentUtils.document2Article(document); 64 65 /* 66 * 使用高亮器 67 */ 68 /** 69 * 1、分词器 查找关键词 2、字段 在哪个字段上进行高亮 3、字段的内容 把字段的内容提取出来 70 */ 71 // 它实现了从指定的原始文件中,提取检索关键字出现频率最高的一段文字作为摘要 72 String titleText = highlighter.getBestFragment( 73 LuceneUtils.analyzer, "title", document.get("title")); 74 75 String contentText = highlighter.getBestFragment( 76 LuceneUtils.analyzer, "content", document.get("content")); 77 78 if (titleText != null) { 79 article.setTitle(titleText); 80 } 81 if (contentText != null) { 82 article.setContent(contentText); 83 } 84 articleList.add(article); 85 86 } 87 88 for(Article article:articleList){ 89 System.out.println(article.getId()); 90 System.out.println(article.getTitle()); 91 System.out.println(article.getContent()); 92 } 93 } 94 }