工程是一门科学,科学是严谨的。

Lucene.net 全文检索 盘古分词

lucene.net + 盘古分词

引用:

1.Lucene.Net.dll

2.PanGu.Lucene.Analyzer.dll 

3.PanGu.HighLight.dll

4.PanGu.dll

 1 using Lucene.Net.Search;
 2 using Lucene.Net.Store;
 3 using Lucene.Net.QueryParsers;
 4 using Lucene.Net.Documents;
 5 using Lucene.Net.Index;
 6 using Lucene.Net.Analysis.Standard;
 7 using Lucene.Net.Analysis;
 8 using Lucene.Net.Analysis.PanGu;
 9 using PanGu.HighLight;
10 using PanGu;

 

1.建立索引:

 1 static string path = @"G:\indextest";//索引文件储存位置
 2 
 3 static void CreateIndex()
 4         {
 5             //创建索引库目录
 6             var directory = FSDirectory.Open(new DirectoryInfo(path));
 7             Analyzer analyzer = null;
 8             //analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
 9 
10             if (isPangu)
11             {
12                 analyzer = new PanGuAnalyzer();//盘古Analyzer
13             }
14             else
15             {
16                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
17             }
18 
19             //创建一个索引,采用StandardAnalyzer对句子进行分词
20             IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
21             MySqlConnection conn = new MySqlConnection(@"server=localhost;User Id=root;password=123456;Database=ecshop");
22             conn.Open();
23             MySqlCommand cmd = new MySqlCommand("select goods_name,goods_brief from ecs_goods", conn);
24             MySqlDataReader reader = cmd.ExecuteReader();
25             while (reader.Read())
26             {
27                 //域的集合:文档,类似于表的行
28                 Document doc = new Document();
29                 //要索引的字段
30                 doc.Add(new Field("goods_name", reader["goods_name"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
31                 doc.Add(new Field("goods_brief", reader["goods_brief"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
32                 indexWriter.AddDocument(doc);
33             }
34             reader.Close();
35             //对索引文件进行优化
36             indexWriter.Optimize();
37             indexWriter.Close();
38         }

 

2.搜索:

 1      protected void Page_Load(object sender, EventArgs e)
 2         {
 3             keyword = Request.Form["q"];
 4             if (keyword != null && keyword != "")
 5             {
 6                 var watch = Stopwatch.StartNew();
 7                 Analyzer analyzer = null;
 8                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
 9 
10                 //搜索
11                 IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(path)), true);
12 
13                 //查询表达式
14                 QueryParser queryP = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "goods_name", analyzer);
15 
16                 //query.parse:注入查询条件
17                 Query query = queryP.Parse(keyword);
18                 var hits = searcher.Search(query, 200);
19 
20                 //create highlighter
21                 //IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>");
22                 //SimpleFragmenter fragmenter = new SimpleFragmenter(80);
23                 //var scorer = new QueryScorer(query);
24                 //Highlighter highlighter = new Highlighter(formatter, scorer);
25                 //highlighter.TextFragmenter = fragmenter;
26 
27                 //PanGu create highlighter
28                 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
29                    new PanGu.HighLight.SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>");
30                 PanGu.HighLight.Highlighter highlighter =
31                     new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
32                     new Segment());
33                 highlighter.FragmentSize = 50;
34 
35                 for (int i = 0; i < hits.totalHits; i++)
36                 {
37                     Document doc = searcher.Doc(hits.scoreDocs[i].doc);
38                     //TokenStream stream = analyzer.TokenStream("goods_name", new StringReader(doc.Get("goods_name")));
39                     //String sample = highlighter.GetBestFragments(stream, doc.Get("goods_name"), 2, "...");
40                     goods g = new goods();
41                     g.goods_name = highlighter.GetBestFragment(keyword, doc.Get("goods_name"));
42                     g.goods_brief = highlighter.GetBestFragment(keyword, doc.Get("goods_brief"));
43                     gs.Add(g);
44                 }
45 
46                 watch.Stop();
47 
48                 tasktime = "搜索耗费时间:" + watch.ElapsedMilliseconds + "毫秒";
49             }
50         }

 多字段搜索

1  string[] fields = { "Title", "Content" };
2                 MultiFieldQueryParser mq = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, fields, analyzer);
3                 Query multiquery = mq.Parse(keyword);// MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, new string[] { keyword }, fields, analyzer);
4                 var hits1 = searcher.Search(multiquery, 200);

 

posted @ 2013-07-16 18:11  大圣的笑  阅读(3704)  评论(10编辑  收藏  举报