1 package com.lucene.util;
2
3 import java.io.Reader;
4 import java.util.Set;
5
6 import org.apache.lucene.analysis.Analyzer;
7 import org.apache.lucene.analysis.LetterTokenizer;
8 import org.apache.lucene.analysis.LowerCaseFilter;
9 import org.apache.lucene.analysis.StopAnalyzer;
10 import org.apache.lucene.analysis.StopFilter;
11 import org.apache.lucene.analysis.TokenStream;
12 import org.apache.lucene.util.Version;
13
14 //定义禁用词分词器
15 public class UserDefinedAnalyzer extends Analyzer {
16
17 //定义禁用词集合
18 private Set stops;
19
20 //无参构造器使用默认的禁用词分词器
21 public UserDefinedAnalyzer(){
22 stops=StopAnalyzer.ENGLISH_STOP_WORDS_SET;
23 }
24
25 /**
26 * 传一个禁用词数组
27 * @param sws
28 */
29 public UserDefinedAnalyzer(String[] sws){
30 //使用stopFilter创建禁用词集合
31 stops=StopFilter.makeStopSet(Version.LUCENE_35,sws,true);
32 //将默认的禁用词添加进集合
33 stops.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
34 }
35
36 /**
37 * 自定义分词器
38 */
39 @Override
40 public TokenStream tokenStream(String str, Reader reader) {
41
42 return new StopFilter
43 (Version.LUCENE_35,
44 new LowerCaseFilter
45 (Version.LUCENE_35,
46 new LetterTokenizer(
47 Version.LUCENE_35, reader)), stops);
48 }
49
50 }
@Test
public void test04(){
Analyzer a1=new UserDefinedAnalyzer(new String[]{"my","name"});
//Analyzer a1=new UserDefinedAnalyzer();
String str="my name is paul";
AnalyzerUtil.displayToken(str, a1);
}