Lucene.NET 开发实现
最近在帮一个朋友忙,帮他们一个软件设计一下架构,该应用程序某核心逻辑设计数据量较大,客户对查询要求又很高。这种需求除了在数据库设计要考虑水平分表,分区视图之类的设计,在程序中也要考虑效率问题,于是就决定使用LUCENE.NET将核心数据做索引文件,作假全文搜索,这样就算数据量由千万级别,查询也只在几秒钟完成,对性能帮助还是有很大帮助。之前也没仔细了解过LUCENE方面技术,正好学习一下。
LUCENE.NET是JAVA移植到.NET平台上的开源技术,技术资料也很丰富。
以下是创建索引代码:
public static void Run()
{
QryPage qryPage = new QryPage();
qryPage.PerPageSize = 350;
qryPage.PageNumber = 0;
qryPage.PageCount = 10000;
qryPage.NeedInitPageNo = false;
List<AutoParts> packages = new List<AutoParts>();
while (qryPage.PageNumber < qryPage.PageCount)
{
IList<AutoParts> autoPartses = new CustomerQuery().QueryAutoParts(new AutoPartDTO(), ref qryPage);//获取索引数据
foreach (var p in autoPartses)
{
if (!IsValidProduct(p))
{
continue;
}
packages.Add(p);
}
qryPage.PageNumber++;
}
//
// Write search item index to file.
//
Write(packages);
}
public static void Write(List<AutoParts> packages)
{
build( packages);
}
public static void build( List<AutoParts> packages)
{
var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), true);
try
{
writer.SetMaxFieldLength(1000);
writer.SetUseCompoundFile(true);
Logger.Info("Indexing to directory '" + Common.ProductIndexPath + "'...");
DateTime start = System.DateTime.Now;
indexDocs(writer, packages);
Logger.Info("Optimizing...");
writer.Optimize();
writer.Close();
DateTime end = System.DateTime.Now;
//Console.Out.WriteLine(end.Ticks - start.Ticks + " total milliseconds");
Logger.Info(end.Ticks - start.Ticks + " total milliseconds");
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
}
public static void UpdateIndex(AutoParts dto)
{
try
{
Term tm = new Term("id", dto.Id.ToString());
var qerty = new TermQuery(tm);
var productIndexReader = IndexReader.Open(Common.ProductIndexPath);
var searcher = new IndexSearcher(productIndexReader);
var his = searcher.Search(qerty);
var reader = IndexReader.Open(Common.ProductIndexPath);
reader.DeleteDocuments(tm);
var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);
AddDocument(dto, writer);
writer.Optimize();
writer.Close();
}
catch (Exception e)
{
Console.WriteLine("添加索引出错,配件ID:" + dto.Id + "\n");
Console.Write(e.Message);
}
}
public static void AddDocument(AutoParts dto)
{
try
{
var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);
AddDocument(dto, writer);
writer.Optimize();
writer.Close();
}
catch (Exception e)
{
Console.WriteLine("添加索引出错,配件ID:"+dto.Id+"\n");
Console.WriteLine(e.Message);
throw;
}
}
private static void AddDocument(AutoParts package, IndexWriter getWriter)
{
Document doc = new Document();
doc.Add(new Field("id", package.Id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("CarCategoryId", package.CarCategoryId.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("Name", package.Name, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("Code", package.Code, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("FSPrice", package.FSPrice, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("YCPrice", package.YCPrice, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("YCCost", package.YCCost, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("YCSupplier", package.YCSupplier, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("FCPrice", package.FCPrice, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("FCCost", package.FCCost, Field.Store.YES, Field.Index.NO));
//doc.Add(new Field("FCSupplier", new StringReader(package.FCSupplier) ));
doc.Add(new Field("FCSupplier", package.FCCost, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("CCPrice", package.CCPrice, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("CCCost", package.CCCost, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("CCSupplier", package.CCSupplier, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("Repire", package.Repire, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("AskPriceInfo", package.AskPriceInfo, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("AskCustomer", package.AskCustomer, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("Description", package.Description, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("Picture1", package.Picture1.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("Picture2", package.Picture2.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("Picture3", package.Picture3.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("IsAvaliable", package.IsAvaliable.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("CarTypeTags", package.CarTypeTags, Field.Store.YES, Field.Index.TOKENIZED));
doc.Add(new Field("ModifiedTime", package.ModifiedTime.ToShortDateString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("ModifiedBy", package.ModifiedBy, Field.Store.YES, Field.Index.NO));
getWriter.AddDocument(doc);
}
private static void indexDocs(IndexWriter writer, List<AutoParts> packages)
{
try
{
int i = 0;
foreach (var package in packages)
{
i++;
Console.WriteLine("生成索引顺序"+i);
AddDocument(package, writer);
}
}
catch (Exception e)
{
Console.Write(e.Message);
}
}
private static bool IsValidProduct(AutoParts autoParts)
{
return true;
}
}
其中更新索引方法还在调试,因为发现删除索引方法不成功,还在调试
下面是查询核心算法,其中也包含了分页查询,完全可以按照数据库一致的方式来进行查询,核心数据底层查询就如下查询即可
public static List<AutoPartDTO> Query(QueryCritiriaDTO dto, ref QryPage page)// int pageIndex, int pageSize, out int totalRec)
{
IndexSearcher searcher;
if (page.PageNumber == 0)
page.PageNumber = 1;
Sort sort = new Sort(new SortField("id", SortField.DOC, false));
Query query = CreateQuery(dto);
MutiFilter filter = CreateFilter(dto);
query = filter.getFilterQuery(query);
var productIndexReader = IndexReader.Open(Common.ProductIndexPath);
searcher = new IndexSearcher(productIndexReader);
try
{
TopDocs topDocs = searcher.Search(query, null, page.PageNumber * page.PerPageSize, sort);
page.TotalCount = topDocs.totalHits;
page.PageCount = (int)Math.Ceiling((decimal)page.TotalCount / (decimal)page.PerPageSize);
if (page.PageCount == 1 || page.PageCount == 0)
return TopDocs2Data(searcher, topDocs.scoreDocs);
return TopDocs2Data(searcher,topDocs.scoreDocs, page);
}
catch (Exception e)
{
Console.WriteLine("查询出错");
Console.WriteLine(e.Message);
return new List<AutoPartDTO>();
}
finally
{
searcher.Close();
}
}
private static Query CreateQuery(QueryCritiriaDTO dto)
{
var booleanQuery = new BooleanQuery();
if (dto.CatetoryL3 != -1 && dto.CatetoryL3 != 0)
{
TermQuery searcher = new TermQuery(new Term("CarCategoryId", dto.CatetoryL3.ToString()));
booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
}
if (dto.CatetoryL4 != -1 && dto.CatetoryL4 != 0)
{
FuzzyQuery searcher = new FuzzyQuery(new Term("CarTypeTags", dto.CatetoryL4.ToString()), 0.3f);
booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
}
if (!string.IsNullOrEmpty(dto.Name))
{
//FuzzyQuery wildcardQuery = new FuzzyQuery(new Term("Name", dto.Name));
TermQuery searcher = new TermQuery(new Term("Name", dto.Name));
booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
}
if (!string.IsNullOrEmpty(dto.Code))
{
TermQuery searcher = new TermQuery(new Term("Code", dto.Code));
booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
}
if (!string.IsNullOrEmpty(dto.SupplierId))
{
TermQuery searcher = new TermQuery(new Term("SupplierId", dto.SupplierId));
booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
}
return booleanQuery;
}
private static MutiFilter CreateFilter(QueryCritiriaDTO dto)
{
MutiFilter mf = new MutiFilter();
if (dto.Start != CP.Utils.DateTimeUtil.MIN_DATETIME && dto.End != CP.Utils.DateTimeUtil.MIN_DATETIME)
{
mf.AddRangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString());
}
// RangeFilter rf3 = new RangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString(),true, true);
return mf;
}
#region 获取最终的数据
/// <summary>
/// 获取最终的数据
/// </summary>
/// <param name="scoreDoc"></param>
/// <param name="pageIndex"></param>
/// <param name="pageSize"></param>
/// <param name="totalRec"></param>
/// <returns></returns>
private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher, ScoreDoc[] scoreDoc, QryPage page)// int pageIndex, int pageSize, int totalRec)
{
int start = (page.PageNumber - 1) * page.PerPageSize;
int end = page.PageNumber * page.PerPageSize;
if (end > page.TotalCount)
end = page.TotalCount;
List<AutoPartDTO> list = new List<AutoPartDTO>();
for (int index = start; index < end; index++)
{
Document doc = searcher.Doc(scoreDoc[index].doc);
// Document doc = Common.GenerateSearcher().Doc(sd.doc);
AutoPartDTO autoPartDto = new AutoPartDTO() { };
autoPartDto.Id = long.Parse(doc.Get("id"));
autoPartDto.Name = doc.Get("Name");
list.Add(autoPartDto);
}
return list;
}
/// <summary>
/// 获取最终的数据
/// </summary>
/// <param name="docs"></param>
/// <returns></returns>
private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher, ScoreDoc[] docs)
{
if (docs == null || docs.Length == 0)
return null;
List<AutoPartDTO> list = new List<AutoPartDTO>();
foreach (ScoreDoc sd in docs)
{
Document doc = searcher.Doc(sd.doc);
AutoPartDTO autoPartDto = new AutoPartDTO() { };
autoPartDto.Id = long.Parse(doc.Get("id"));
autoPartDto.Name = doc.Get("Name");
list.Add(autoPartDto);
}
return list;
}
#endregion
}
public class Common
{
public static string ProductIndexPath
{
get { return IndexStoredDirectory; }
}
private static string IndexStoredDirectory = AppDomain.CurrentDomain.BaseDirectory + "auto.index";
}
public class MutiFilter
{
private List<Filter> filterList;
public MutiFilter()
{
filterList = new List<Filter>();
}
public void AddFilter(String Field, String Value)
{
Term term = new Term(Field, Value);//添加term
QueryFilter filter = new QueryFilter(new TermQuery(term));//添加过滤器
filterList.Add(filter);//加入List,可以增加多個过滤
}
public void AddRangeFilter(string Field, string start, string end)
{
Term ts = new Term(Field, start);
Term te = new Term(Field, end);
var q = new RangeQuery(ts, te, true);
//var q = new RangeQuery(begin, end, true);
var filter = new QueryFilter(q);
filterList.Add(filter);//加入List,可以增加多個过滤
}
public Query getFilterQuery(Query query)
{
for (int i = 0; i < filterList.Count; i++)
{
//取出多個过滤器,在结果中再次定位结果
query = new FilteredQuery(query, filterList[i]);
}
return query;
}
}
public class EsayTooTokenizer : CharTokenizer
{
public EsayTooTokenizer(TextReader reader)
: base(reader)
{
}
//单纯按照“,” 空格 分词
protected override bool IsTokenChar(char c)
{
return c == ',' || c == ' ' ? false : true;
}
}
public class EsayTooAnalyzer : Analyzer//自定义最简单的分词器
{
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
{
return new EsayTooTokenizer(reader);
}
}
浙公网安备 33010602011771号