四、分页查询
两种方式
分页查询是很常见的需求,比如要查询第10页,每页10条数据。
Lucene 分页通常来讲有两种方式:
第一种是把100条数据查出来,然后取最后10条。 优点是快,缺点是对内存消耗大。
第二种是把第90条查询出来,然后基于这一条,通过searchAfter方法查询10条数据。 优点是内存消耗小,缺点是比第一种更慢
Lucene 分页通常来讲有两种方式:
第一种是把100条数据查出来,然后取最后10条。 优点是快,缺点是对内存消耗大。
第二种是把第90条查询出来,然后基于这一条,通过searchAfter方法查询10条数据。 优点是内存消耗小,缺点是比第一种更慢
第一种
private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)
throws IOException {
TopDocs topDocs = searcher.search(query, pageNow*pageSize);
System.out.println("查询到的总条数\t"+topDocs.totalHits);
ScoreDoc [] alllScores = topDocs.scoreDocs;
List<ScoreDoc> hitScores = new ArrayList<>();
int start = (pageNow -1)*pageSize ;
int end = pageSize*pageNow;
for(int i=start;i<end;i++)
hitScores.add(alllScores[i]);
ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});
return hits;
}
一共查出 pageNow*pageSize条,然后取最后pageSize条
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
package com.how2java;import java.io.IOException;import java.io.StringReader;import java.util.ArrayList;import java.util.List;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexableField;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.wltea.analyzer.lucene.IKAnalyzer;public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 String keyword = "手机"; System.out.println("当前关键字是:"+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int pageNow = 1; int pageSize = 10; ScoreDoc[] hits = pageSearch1(query, searcher, pageNow, pageSize); // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { TopDocs topDocs = searcher.search(query, pageNow*pageSize); System.out.println("查询到的总条数\t"+topDocs.totalHits); ScoreDoc [] alllScores = topDocs.scoreDocs; List<ScoreDoc> hitScores = new ArrayList<>(); int start = (pageNow -1)*pageSize ; int end = pageSize*pageNow; for(int i=start;i<end;i++) hitScores.add(alllScores[i]); ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{}); return hits; } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer)throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println("<br>"); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per); } if(per>10) break; } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); }} |
private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize)
throws IOException {
int start = (pageNow - 1) * pageSize;
if(0==start){
TopDocs topDocs = searcher.search(query, pageNow*pageSize);
return topDocs.scoreDocs;
}
// 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据
TopDocs topDocs = searcher.search(query, start);
//获取到上一页最后一条
ScoreDoc preScore= topDocs.scoreDocs[start-1];
//查询最后一条后的数据的一页数据
topDocs = searcher.searchAfter(preScore, query, pageSize);
return topDocs.scoreDocs;
}
首先是边界条件,如果是第一页,就直接查询了。
如果不是第一页,那么就取start-1那一条,然后再根据它通过searchAfter 来查询
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
package com.how2java;import java.io.IOException;import java.io.StringReader;import java.util.ArrayList;import java.util.List;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexableField;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.wltea.analyzer.lucene.IKAnalyzer;public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 String keyword = "手机"; System.out.println("当前关键字是:"+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int pageNow = 1; int pageSize = 10; ScoreDoc[] hits = pageSearch2(query, searcher, pageNow, pageSize); // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { TopDocs topDocs = searcher.search(query, pageNow*pageSize); System.out.println("查询到的总条数\t"+topDocs.totalHits); ScoreDoc [] alllScores = topDocs.scoreDocs; List<ScoreDoc> hitScores = new ArrayList<>(); int start = (pageNow -1)*pageSize ; int end = pageSize*pageNow; for(int i=start;i<end;i++) hitScores.add(alllScores[i]); ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{}); return hits; } private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { int start = (pageNow - 1) * pageSize; if(0==start){ TopDocs topDocs = searcher.search(query, pageNow*pageSize); return topDocs.scoreDocs; } // 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据 TopDocs topDocs = searcher.search(query, start); //获取到上一页最后一条 ScoreDoc preScore= topDocs.scoreDocs[start-1]; //查询最后一条后的数据的一页数据 topDocs = searcher.searchAfter(preScore, query, pageSize); return topDocs.scoreDocs; } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer)throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println("<br>"); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per); } if(per>10) break; } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); }} |
文件下载:http://download.how2j.cn/1711/lucene.rar
浙公网安备 33010602011771号