package com.weiquan.Lucene;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TxtFileIndexer {
public static void main(String[] args) throws IOException, ParseException {
Analyzer analyzer = new SingleAnalyzer();
//使用指定位置索引文件
Directory index=FSDirectory.open(Paths.get("E:/luceneIndex"));
//创建索引器
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter w = new IndexWriter(index, config);
//添加文档,根据分词器创建索引
addDoc(w, "Thee Art f Computer Science", "12");
addDoc(w, "Thee Art f Computer Science", "23");
addDoc(w, "Thee Art f Computer Science", "34");
addDoc(w, "Thee Art f Computer Science", "45");
addDoc(w, "Thee Art f Computer Science", "56");
// Query delQuery=new QueryParser("text", analyzer).parse("11155");
// w.deleteDocuments(delQuery);
// Term isbnTerm=new Term("isbn", "55320055Z");
// Term textTerm=new Term("title", "今日头条Bc");
//
// Term[] terms=new Term[]{isbnTerm,textTerm};//条件满足其一就会删除
w.close();
// 2. query
// String querystr ="11155";
// the "title" arg specifies the default field to use
// when no field is explicitly specified in the query.
// Query q = new QueryParser("text", analyzer).parse(querystr);
// System.out.println(q.toString());
String[] queries ={"12"};
String[] fields ={"isbn"};
// Query query = MultiFieldQueryParser.parse(queries, fields, analyzer);
BooleanQuery query = new BooleanQuery();
PhraseQuery query1=new PhraseQuery();
query1.add(new Term("isbn", "12"));
PhraseQuery query2=new PhraseQuery();
query2.add(new Term("text", "c"));
// query.add(query1, BooleanClause.Occur.MUST);
query.add(query2, BooleanClause.Occur.MUST);
query.add(query1,BooleanClause.Occur.MUST);
int hitsPerPage = 10;
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
ScoreDoc sd=null;
// if(hits.length>0){
// System.out.println(hits.length);
// sd=hits[hits.length-1];
ScoreDoc sd2=new ScoreDoc(0, 0.26554886f);
//分页
// hits=searcher.searchAfter(sd2, query, hitsPerPage).scoreDocs;
// }else{
//
// }
System.out.println("total :"+collector.getTotalHits());
// searcher.search(query, collector);
// searcher.searchAfter(after, query, 1);
// searcher.searchAfter(null, query, 1);
// 4. display results
System.out.println("Found " + hits.length + " hits.");
for (int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
System.out.println(hits[i].toString());
Document d = searcher.doc(docId);
System.out.println((i + 1) + ". " + d.get("text")+" . "+d.get("isbn")) ;
}
// reader can only be closed when there
// is no need to access the documents any more.
reader.close();
}
private static void addDoc(IndexWriter w, String title, String isbn)
throws IOException {
Document doc = new Document();
doc.add(new TextField("text", title, Field.Store.YES));
// use a string field for isbn because we don't want it tokenized
doc.add(new StringField("isbn", isbn, Field.Store.YES));
w.addDocument(doc);
}
}
package com.weiquan.Lucene;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
public class SingleTokenizer extends Tokenizer {
private int offset = 0, bufferIndex = 0, dataLen = 0;
private final static int MAX_WORD_LEN = 255;
private final static int IO_BUFFER_SIZE = 1024;
private final char[] buffer = new char[MAX_WORD_LEN];
private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
private int length = 0;
private int start = 0;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final boolean flush() {
if (length > 0) {
termAtt.copyBuffer(buffer, 0, length);
offsetAtt.setOffset(correctOffset(start), correctOffset(start
+ length));
return true;
} else
return false;
}
private final void push(char c) {
if (length == 0)
start = offset - 1; // start of token
buffer[length++] = Character.toLowerCase(c); // buffer it
}
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
length = 0;
start = offset;
while (true) {
final char c;
offset++;
if (bufferIndex >= dataLen) {
dataLen = input.read(ioBuffer);
bufferIndex = 0;
}
if (dataLen == -1) {
offset--;
return flush();
} else
c = ioBuffer[bufferIndex++];
switch (Character.getType(c)) {
case Character.DECIMAL_DIGIT_NUMBER:// 注意此部分不过滤一些熟悉或者字母
case Character.LOWERCASE_LETTER:// 注意此部分
case Character.UPPERCASE_LETTER:// 注意此部分
// push(c);
// if (length == MAX_WORD_LEN) return flush();
// break;
case Character.OTHER_LETTER:
if (length > 0) {
bufferIndex--;
offset--;
return flush();
}
push(c);
return flush();
default:
if (length > 0)
return flush();
break;
}
}
}
@Override
public final void end() {
// set final offset
final int finalOffset = correctOffset(offset);
this.offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
public void reset() throws IOException {
super.reset();
offset = bufferIndex = dataLen = 0;
}
}
package com.weiquan.Lucene;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
public class SingleAnalyzer extends Analyzer {
/**
* 禁用某些搜索词,比如黄赌毒
*/
private CharArraySet stops;
public SingleAnalyzer() {
List<String> stopWords = Arrays.asList(new String[] {});
CharArraySet stopSet = new CharArraySet(stopWords, false);
stops = CharArraySet.unmodifiableSet(stopSet);
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer token = new SingleTokenizer();
return new TokenStreamComponents(token);
}
}