四、分页查询

两种方式

分页查询是很常见的需求，比如要查询第10页，每页10条数据。
Lucene 分页通常来讲有两种方式：
第一种是把100条数据查出来，然后取最后10条。优点是快，缺点是对内存消耗大。
第二种是把第90条查询出来，然后基于这一条，通过searchAfter方法查询10条数据。优点是内存消耗小，缺点是比第一种更慢

第一种

private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)

throws IOException {

TopDocs topDocs = searcher.search(query, pageNow*pageSize);

System.out.println("查询到的总条数\t"+topDocs.totalHits);

ScoreDoc [] alllScores = topDocs.scoreDocs;

List<ScoreDoc> hitScores = new ArrayList<>();

int start = (pageNow -1)*pageSize ;

int end = pageSize*pageNow;

for(int i=start;i<end;i++)

hitScores.add(alllScores[i]);

ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});

return hits;

}

一共查出 pageNow*pageSize条，然后取最后pageSize条

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

package com.how2java;
 
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
 
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
 
public class TestLucene {
 
    public static void main(String[] args) throws Exception {
        // 1. 准备中文分词器
        IKAnalyzer analyzer = new IKAnalyzer();
        // 2. 索引
        Directory index = createIndex(analyzer);
 
        // 3. 查询器
         
            String keyword = "手机";
            System.out.println("当前关键字是："+keyword);
            Query query = new QueryParser( "name", analyzer).parse(keyword);
 
            // 4. 搜索
            IndexReader reader = DirectoryReader.open(index);
            IndexSearcher searcher=new IndexSearcher(reader);
            int pageNow = 1;
            int pageSize = 10;
             
            ScoreDoc[] hits = pageSearch1(query, searcher, pageNow, pageSize);
             
            // 5. 显示查询结果
            showSearchResults(searcher, hits,query,analyzer);
            // 6. 关闭查询
            reader.close();
         
    }
 
    private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)
            throws IOException {
        TopDocs topDocs = searcher.search(query, pageNow*pageSize);
         System.out.println("查询到的总条数\t"+topDocs.totalHits);
         ScoreDoc [] alllScores = topDocs.scoreDocs;
 
         List<ScoreDoc> hitScores = new ArrayList<>();
         
         int start = (pageNow -1)*pageSize ;
         int end = pageSize*pageNow;
         for(int i=start;i<end;i++)
             hitScores.add(alllScores[i]);
         
         ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});
        return hits;
    }
     
    private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer)throws Exception {
        System.out.println("找到 " + hits.length + " 个命中.");
 
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
 
        System.out.println("找到 " + hits.length + " 个命中.");
        System.out.println("序号\t匹配度得分\t结果");
        for (int i = 0; i < hits.length; ++i) {
            ScoreDoc scoreDoc= hits[i];
            int docId = scoreDoc.doc;
            Document d = searcher.doc(docId);
            List<IndexableField> fields= d.getFields();
            System.out.print((i + 1) );
            System.out.print("\t" + scoreDoc.score);
            for (IndexableField f : fields) {
 
                if("name".equals(f.name())){
                    TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
                    String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
                    System.out.print("\t"+fieldContent);
                }
                else{
                    System.out.print("\t"+d.get(f.name()));
                }
            }
            System.out.println("<br>");
        }
    }
 
    private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
        Directory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(index, config);
        String fileName = "140k_products.txt";
        List<Product> products = ProductUtil.file2list(fileName);
        int total = products.size();
        int count = 0;
        int per = 0;
        int oldPer =0;
        for (Product p : products) {
            addDoc(writer, p);
            count++;
            per = count*100/total;
            if(per!=oldPer){
                oldPer = per;
                System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per);
            }
             
            if(per>10)
                break;
             
        }
        writer.close();
        return index;
    }
 
    private static void addDoc(IndexWriter w, Product p) throws IOException {
        Document doc = new Document();
        doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
        doc.add(new TextField("name", p.getName(), Field.Store.YES));
        doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
        doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
        doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
        doc.add(new TextField("code", p.getCode(), Field.Store.YES));
        w.addDocument(doc);
    }
}

第二种

顶折

private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize)

throws IOException {

int start = (pageNow - 1) * pageSize;

if(0==start){

TopDocs topDocs = searcher.search(query, pageNow*pageSize);

return topDocs.scoreDocs;

}

// 查询数据，结束页面自前的数据都会查询到，但是只取本页的数据

TopDocs topDocs = searcher.search(query, start);

//获取到上一页最后一条

ScoreDoc preScore= topDocs.scoreDocs[start-1];

//查询最后一条后的数据的一页数据

topDocs = searcher.searchAfter(preScore, query, pageSize);

return topDocs.scoreDocs;

}

首先是边界条件，如果是第一页，就直接查询了。
如果不是第一页，那么就取start-1那一条，然后再根据它通过searchAfter 来查询

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

package com.how2java;
 
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
 
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
 
public class TestLucene {
 
    public static void main(String[] args) throws Exception {
        // 1. 准备中文分词器
        IKAnalyzer analyzer = new IKAnalyzer();
        // 2. 索引
        Directory index = createIndex(analyzer);
 
        // 3. 查询器
         
            String keyword = "手机";
            System.out.println("当前关键字是："+keyword);
            Query query = new QueryParser( "name", analyzer).parse(keyword);
 
            // 4. 搜索
            IndexReader reader = DirectoryReader.open(index);
            IndexSearcher searcher=new IndexSearcher(reader);
            int pageNow = 1;
            int pageSize = 10;
             
            ScoreDoc[] hits = pageSearch2(query, searcher, pageNow, pageSize);
             
            // 5. 显示查询结果
            showSearchResults(searcher, hits,query,analyzer);
            // 6. 关闭查询
            reader.close();
         
    }
 
    private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)
            throws IOException {
        TopDocs topDocs = searcher.search(query, pageNow*pageSize);
         System.out.println("查询到的总条数\t"+topDocs.totalHits);
         ScoreDoc [] alllScores = topDocs.scoreDocs;
 
         List<ScoreDoc> hitScores = new ArrayList<>();
         
         int start = (pageNow -1)*pageSize ;
         int end = pageSize*pageNow;
         for(int i=start;i<end;i++)
             hitScores.add(alllScores[i]);
         
         ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});
        return hits;
    }
     
    private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize)
            throws IOException {
         
        int start = (pageNow - 1) * pageSize;
        if(0==start){
            TopDocs topDocs = searcher.search(query, pageNow*pageSize);
            return topDocs.scoreDocs;
        }
        // 查询数据， 结束页面自前的数据都会查询到，但是只取本页的数据
        TopDocs topDocs = searcher.search(query, start);
        //获取到上一页最后一条
         
        ScoreDoc preScore= topDocs.scoreDocs[start-1];
 
        //查询最后一条后的数据的一页数据
        topDocs = searcher.searchAfter(preScore, query, pageSize);
        return topDocs.scoreDocs;
         
    }
 
    private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer)throws Exception {
        System.out.println("找到 " + hits.length + " 个命中.");
 
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
 
        System.out.println("找到 " + hits.length + " 个命中.");
        System.out.println("序号\t匹配度得分\t结果");
        for (int i = 0; i < hits.length; ++i) {
            ScoreDoc scoreDoc= hits[i];
            int docId = scoreDoc.doc;
            Document d = searcher.doc(docId);
            List<IndexableField> fields= d.getFields();
            System.out.print((i + 1) );
            System.out.print("\t" + scoreDoc.score);
            for (IndexableField f : fields) {
 
                if("name".equals(f.name())){
                    TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
                    String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
                    System.out.print("\t"+fieldContent);
                }
                else{
                    System.out.print("\t"+d.get(f.name()));
                }
            }
            System.out.println("<br>");
        }
    }
 
    private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
        Directory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(index, config);
        String fileName = "140k_products.txt";
        List<Product> products = ProductUtil.file2list(fileName);
        int total = products.size();
        int count = 0;
        int per = 0;
        int oldPer =0;
        for (Product p : products) {
            addDoc(writer, p);
            count++;
            per = count*100/total;
            if(per!=oldPer){
                oldPer = per;
                System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per);
            }
             
            if(per>10)
                break;
             
        }
        writer.close();
        return index;
    }
 
    private static void addDoc(IndexWriter w, Product p) throws IOException {
        Document doc = new Document();
        doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
        doc.add(new TextField("name", p.getName(), Field.Store.YES));
        doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
        doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
        doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
        doc.add(new TextField("code", p.getCode(), Field.Store.YES));
        w.addDocument(doc);
    }
}
 

文件下载：http://download.how2j.cn/1711/lucene.rar

posted on 2019-01-04 10:17 我是司阅读(492) 评论(0) 收藏举报

刷新页面返回顶部

四、分页查询

两种方式

第一种

第二种

导航

公告