成功将99收藏夹实现基于Lucene的站内全文搜索
99收藏夹原来的站内收藏搜索是基于数据库的like%%语句,发现速度有点慢,于是想到用Lucene来实现站内收藏的按名字和共享收藏的连接标题来搜索的功能,昨天下午做好了,但是结果有点牵强人意。
首先我用Lucene把数据从数据库导出建立索引,代码如下,注:里面的ChinaTokene方法是调用www.sj110.com的中文分词组件
![]()
1
public class IntranetIndexer
2
{
3
private IndexWriter writer;
4
public IntranetIndexer(string dictory)
5
{
6
if (Directory.GetFiles(dictory).Length == 0)
7
{
8
writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), true);
9
}
10
else
11
{
12
writer = new IndexWriter(dictory,new WhitespaceAnalyzer(),false);
13
}
14
writer.SetUseCompoundFile(true);
15
}
16
public void AddDataReaderToIndex(SqlDataReader dr)
17
{
18
if (dr.HasRows)
19
{
20
string EndId="0";
21
while (dr.Read())
22
{
23
/*int fieldCount = dr.FieldCount;
24
for (int i = 0; i < fieldCount; i++)
25
{
26
Document doc = new Document();
27
doc.Add(Field.Text(dr.GetName(i).ToString(),ChinaTokene(dr[i].ToString())));
28
writer.AddDocument(doc);
29
EndId = dr[0].ToString();
30
}*/
31
Document doc = new Document();
32
doc.Add(Field.Keyword("this_url",dr["this_url"].ToString()));
33
doc.Add(Field.Keyword("this_name",dr["this_name"].ToString()));
34
doc.Add(Field.Text("this_title", ChinaTokene(dr["this_title"].ToString())));
35
doc.Add(Field.UnIndexed("all_title",dr["this_title"].ToString()));
36
writer.AddDocument(doc);
37
EndId = dr["this_id"].ToString();
38
}
39
using (StreamWriter sw = new StreamWriter(@"C:\Inetpub\wwwroot\pwqdream\endid.txt"))
40
{
41
sw.WriteLine(EndId);
42
}
43
}
44
}
45![]()
46
private string ChinaTokene(string s)
47
{
48
System.Text.StringBuilder sb = new System.Text.StringBuilder();
49
List<string> results = Sj110.Com.Chinese.Tokenizer.Tokenize(s);
50
foreach (string str in results)
51
{
52
sb.AppendFormat("{0} ",str);
53
}
54
sb = sb.Remove(sb.Length-2,1);
55
return sb.ToString();
56
}
57
public void Close()
58
{
59
writer.Optimize();
60
writer.Close();
61
}
当然,最后面要把最后的数据库的那条记录的ID写入文本文件
因为在前面的得到DataReader的时候我们首先是根据这个文本文件来获得最后条记录的ID再查询的
代码如下
![]()
1
protected void buttonAddIndex_Click(object sender, EventArgs e)
2
{
3
int endId;
4
using (StreamReader sr = new StreamReader(@"C:\Inetpub\wwwroot\pwqdream\endid.txt"))
5
{
6
try
7
{
8
endId = Convert.ToInt32(sr.ReadLine());
9
if (endId < 0)
10
endId = 0;
11
}
12
catch
13
{
14
endId = 0;
15
}
16
}
17
string connStr = ConfigurationManager.AppSettings[0].ToString();
18
SqlConnection conn = new SqlConnection(connStr);
19
string selStr = "select this_id,this_title,this_url,this_name from userFav where this_id>@this_id";
20
SqlCommand comm = new SqlCommand(selStr,conn);
21
comm.Parameters.AddWithValue("@this_id",endId);
22
conn.Open();
23
SqlDataReader dr = comm.ExecuteReader();
24
IntranetIndexer writer = new IntranetIndexer(@"C:\Inetpub\wwwroot\pwqdream\index");
25
writer.AddDataReaderToIndex(dr);
26
writer.Close();
27
dr.Close();
28
dr.Dispose();
29
conn.Close();
30
}
查询的时候用的是ajax调用后台方法,多字段搜索,且都先分词了,结果很牵强,如果输入的是名字,那么取不到url和url,如果输入的象标题,则取不到url和名字,还有界面很不友好,下面是代码:
![]()
1
function showUserShellFav(obj,pageNumber)
2
{
3
var selValue = document.getElementById("searchSel").value;
4
if(obj!="")
5
{
6
var t = document.getElementById("titleTd");
7
//var c = document.getElementById("contentTd");
8
t.innerHTML="搜索关于--<font color='red'>"+obj+"</font>--的共享收藏";
9
//c.innerHTML="";
10
showLoad();
11
PwqzcDream.test.SelectUserShellFav(obj,pageNumber,selValue,onSelectUserShellFavCom);
12
}
13
}
14
function onSelectUserShellFavCom(rel)
15
{
16
hideLoad();
17
var pageIndexTd = document.getElementById("pageIndexTd");
18
var c = document.getElementById("contentTd");
19
var dtContent = rel.value.Tables[0];
20
var dtCount = rel.value.Tables[1];
21
pageIndexTd.innerHTML = "";
22
for(var r=0;r<dtCount.Rows.length;r++)
23
{
24
var cu = dtCount.Rows[r];
25
pageIndexTd.innerHTML += cu.html;
26
}
27
if(dtContent.Rows.length>0)
28
{
29
c.innerHTML = "";
30
if(PwqzcDream.MyDefault.IsLogin().value)
31
{
32
for(var row=0;row<dtContent.Rows.length;row++)
33
{
34
var cur = dtContent.Rows[row];
35
c.innerHTML+="<li><a type='"+cur.this_url+"' style='color:#0088e4;position:relative;cursor:pointer;' onmousedown='MouseDownToMove(this,event);' onmousemove='MouseMoveToMove(event);' onmouseup='MouseUpToMove(event);' onclick='openLink(this.type);'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";
36
}
37
}
38
else
39
{
40
for(var row=0;row<dtContent.Rows.length;row++)
41
{
42
var cur = dtContent.Rows[row];
43
c.innerHTML+="<li><a href='"+cur.this_url+"' style='color:#0088e4;' target='_blank'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";
44
}
45
}
46
}
47
else
48
{
49
c.innerHTML = "对不起,没有找到相关的收藏!";
50
}
51
}
下面是按照用户名字搜索结果的抓图,当然,速度提升是很明显的
![]()
下面是按照标题搜索的结果的抓图:
![]()
同时也希望大家能够喜欢99收藏夹,谢谢大家!
首先我用Lucene把数据从数据库导出建立索引,代码如下,注:里面的ChinaTokene方法是调用www.sj110.com的中文分词组件
1
public class IntranetIndexer2
{3
private IndexWriter writer;4
public IntranetIndexer(string dictory)5
{6
if (Directory.GetFiles(dictory).Length == 0)7
{8
writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), true);9
}10
else11
{12
writer = new IndexWriter(dictory,new WhitespaceAnalyzer(),false);13
}14
writer.SetUseCompoundFile(true);15
}16
public void AddDataReaderToIndex(SqlDataReader dr)17
{18
if (dr.HasRows)19
{20
string EndId="0";21
while (dr.Read())22
{23
/*int fieldCount = dr.FieldCount;24
for (int i = 0; i < fieldCount; i++)25
{26
Document doc = new Document();27
doc.Add(Field.Text(dr.GetName(i).ToString(),ChinaTokene(dr[i].ToString())));28
writer.AddDocument(doc);29
EndId = dr[0].ToString();30
}*/31
Document doc = new Document();32
doc.Add(Field.Keyword("this_url",dr["this_url"].ToString()));33
doc.Add(Field.Keyword("this_name",dr["this_name"].ToString()));34
doc.Add(Field.Text("this_title", ChinaTokene(dr["this_title"].ToString())));35
doc.Add(Field.UnIndexed("all_title",dr["this_title"].ToString()));36
writer.AddDocument(doc);37
EndId = dr["this_id"].ToString();38
}39
using (StreamWriter sw = new StreamWriter(@"C:\Inetpub\wwwroot\pwqdream\endid.txt"))40
{41
sw.WriteLine(EndId);42
}43
}44
}45

46
private string ChinaTokene(string s)47
{48
System.Text.StringBuilder sb = new System.Text.StringBuilder();49
List<string> results = Sj110.Com.Chinese.Tokenizer.Tokenize(s);50
foreach (string str in results)51
{52
sb.AppendFormat("{0} ",str);53
}54
sb = sb.Remove(sb.Length-2,1);55
return sb.ToString();56
}57
public void Close()58
{59
writer.Optimize();60
writer.Close();61
}当然,最后面要把最后的数据库的那条记录的ID写入文本文件
因为在前面的得到DataReader的时候我们首先是根据这个文本文件来获得最后条记录的ID再查询的
代码如下
1
protected void buttonAddIndex_Click(object sender, EventArgs e)2
{3
int endId;4
using (StreamReader sr = new StreamReader(@"C:\Inetpub\wwwroot\pwqdream\endid.txt"))5
{6
try7
{8
endId = Convert.ToInt32(sr.ReadLine());9
if (endId < 0)10
endId = 0;11
}12
catch13
{14
endId = 0;15
}16
}17
string connStr = ConfigurationManager.AppSettings[0].ToString();18
SqlConnection conn = new SqlConnection(connStr);19
string selStr = "select this_id,this_title,this_url,this_name from userFav where this_id>@this_id";20
SqlCommand comm = new SqlCommand(selStr,conn);21
comm.Parameters.AddWithValue("@this_id",endId);22
conn.Open();23
SqlDataReader dr = comm.ExecuteReader();24
IntranetIndexer writer = new IntranetIndexer(@"C:\Inetpub\wwwroot\pwqdream\index");25
writer.AddDataReaderToIndex(dr);26
writer.Close();27
dr.Close();28
dr.Dispose();29
conn.Close();30
}查询的时候用的是ajax调用后台方法,多字段搜索,且都先分词了,结果很牵强,如果输入的是名字,那么取不到url和url,如果输入的象标题,则取不到url和名字,还有界面很不友好,下面是代码:
1
function showUserShellFav(obj,pageNumber)2
{3
var selValue = document.getElementById("searchSel").value;4
if(obj!="")5
{6
var t = document.getElementById("titleTd");7
//var c = document.getElementById("contentTd");8
t.innerHTML="搜索关于--<font color='red'>"+obj+"</font>--的共享收藏";9
//c.innerHTML="";10
showLoad();11
PwqzcDream.test.SelectUserShellFav(obj,pageNumber,selValue,onSelectUserShellFavCom);12
}13
}14
function onSelectUserShellFavCom(rel)15
{16
hideLoad();17
var pageIndexTd = document.getElementById("pageIndexTd"); 18
var c = document.getElementById("contentTd");19
var dtContent = rel.value.Tables[0];20
var dtCount = rel.value.Tables[1];21
pageIndexTd.innerHTML = "";22
for(var r=0;r<dtCount.Rows.length;r++)23
{24
var cu = dtCount.Rows[r];25
pageIndexTd.innerHTML += cu.html;26
}27
if(dtContent.Rows.length>0)28
{29
c.innerHTML = "";30
if(PwqzcDream.MyDefault.IsLogin().value)31
{32
for(var row=0;row<dtContent.Rows.length;row++)33
{34
var cur = dtContent.Rows[row];35
c.innerHTML+="<li><a type='"+cur.this_url+"' style='color:#0088e4;position:relative;cursor:pointer;' onmousedown='MouseDownToMove(this,event);' onmousemove='MouseMoveToMove(event);' onmouseup='MouseUpToMove(event);' onclick='openLink(this.type);'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";36
}37
}38
else39
{40
for(var row=0;row<dtContent.Rows.length;row++)41
{42
var cur = dtContent.Rows[row];43
c.innerHTML+="<li><a href='"+cur.this_url+"' style='color:#0088e4;' target='_blank'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";44
}45
}46
}47
else48
{49
c.innerHTML = "对不起,没有找到相关的收藏!";50
}51
}下面是按照用户名字搜索结果的抓图,当然,速度提升是很明显的

下面是按照标题搜索的结果的抓图:

同时也希望大家能够喜欢99收藏夹,谢谢大家!



浙公网安备 33010602011771号