package analysis;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
public class AnalyzerDemo {
/** 需要分析的数据 */
private static final String[] examples = {
"The quick brown fox jumped over the lazy dog",
"XY&Z Corporation - xyz@example.com",
"中华人名共和国1949年成立,从此中国人民酒陷入了水深火热之中,Fuck!"
};
/** 需要测试得分析器 */
private static final Analyzer[] analyzers = new Analyzer[] {
new WhitespaceAnalyzer(),
new SimpleAnalyzer(),
new StopAnalyzer(Version.LUCENE_30),
new StandardAnalyzer(Version.LUCENE_30)
};
/** 执行分析测试 */
public static void main(String[] args) throws IOException {
String[] strings = examples;
for (String text : strings) {
analyze(text);
}
}
/** 格式化输出分析结果 */
private static void analyze(String text) throws IOException {
System.out.println("Analyzing \"" + text + "\"");
for (Analyzer analyzer : analyzers) {
String name = analyzer.getClass().getSimpleName();
System.out.println(" " + name + ":");
System.out.print(" ");
AnalyzerUtils.displayTokens(analyzer, text); // B
System.out.println("\n");
}
}
}
package analysis;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public class AnalyzerUtils {
public static void displayTokens(Analyzer analyzer, String text)
throws IOException {
displayTokens(analyzer.tokenStream("contents", new StringReader(text))); // A
}
public static void displayTokens(TokenStream stream) throws IOException {
TermAttribute term = stream.addAttribute(TermAttribute.class);
while (stream.incrementToken()) {
System.out.print("[" + term.term() + "] "); // B
}
}
}
下载: Lucene_in_Action_2nd_Edition.rar