五、索引删除和更新
索引删除和更新
索引建立好了之后,还是需要维护的,比如新增,删除和维护。 新增就是建立索引的过程,这里就不表了,本教材主要讲索引的删除和更新。
索引里的数据,其实就是一个一个的Document 对象,那么本文就是介绍如何删除和更新这些Documen对象。
索引里的数据,其实就是一个一个的Document 对象,那么本文就是介绍如何删除和更新这些Documen对象。
删除索引
删除id=51173的Document之后,如图所示,再搜索鞭字,就查询不到结果了。
删除关键代码如下,通过 Term对象删除
删除关键代码如下,通过 Term对象删除
//删除id=51173的数据
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
indexWriter.deleteDocuments(new Term("id", "51173"));
indexWriter.commit();
indexWriter.close();
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
package com.how2java;import java.io.IOException;import java.io.StringReader;import java.util.List;import java.util.Scanner;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexableField;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.wltea.analyzer.lucene.IKAnalyzer;public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 Scanner s = new Scanner(System.in); //删除id=51173的数据 IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(index, config); indexWriter.deleteDocuments(new Term("id", "51173")); indexWriter.commit(); indexWriter.close(); while(true){ System.out.print("请输入查询关键字:"); String keyword = s.nextLine(); System.out.println("当前关键字是:"+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int numberPerPage = 10; ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs; // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer)throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println("<br>"); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per); } } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); }} |
更多删除
还可以按照如下方法来删除索引,API 很明显,就不做代码示例了
下地址载:http://download.how2j.cn/1712/lucene.rar
DeleteDocuments(Query query):根据Query条件来删除单个或多个Document
DeleteDocuments(Query[] queries):根据Query条件来删除单个或多个Document
DeleteDocuments(Term term):根据Term来删除单个或多个Document
DeleteDocuments(Term[] terms):根据Term来删除单个或多个Document
DeleteAll():删除所有的Document
更新索引
如图所示,更新索引后,再用鞭查询,得到的结果是查出了更新之后的数据。 更新的关键代码:
// 更新索引
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
Document doc = new Document();
doc.add(new TextField("id", "51173", Field.Store.YES));
doc.add(new TextField("name", "神鞭,鞭没了,神还在", Field.Store.YES));
doc.add(new TextField("category", "道具", Field.Store.YES));
doc.add(new TextField("price", "998", Field.Store.YES));
doc.add(new TextField("place", "南海群岛", Field.Store.YES));
doc.add(new TextField("code", "888888", Field.Store.YES));
indexWriter.updateDocument(new Term("id", "51173"), doc );
indexWriter.commit();
indexWriter.close();
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
package com.how2java;import java.io.IOException;import java.io.StringReader;import java.util.List;import java.util.Scanner;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexableField;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.wltea.analyzer.lucene.IKAnalyzer;public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 // 更新索引 IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(index, config); Document doc = new Document(); doc.add(new TextField("id", "51173", Field.Store.YES)); doc.add(new TextField("name", "神鞭,鞭没了,神还在", Field.Store.YES)); doc.add(new TextField("category", "道具", Field.Store.YES)); doc.add(new TextField("price", "998", Field.Store.YES)); doc.add(new TextField("place", "南海群岛", Field.Store.YES)); doc.add(new TextField("code", "888888", Field.Store.YES)); indexWriter.updateDocument(new Term("id", "51173"), doc ); indexWriter.commit(); indexWriter.close(); Scanner s = new Scanner(System.in); while(true){ System.out.print("请输入查询关键字:"); String keyword = s.nextLine(); System.out.println("当前关键字是:"+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int numberPerPage = 10; ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs; // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer)throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println("<br>"); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per); } } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); }} |
lucene : https://lucene.apache.org/
浙公网安备 33010602011771号