xuwenzhuo

.net lucene 实战搜索(三)----- 基本之搜索

    上一节,主要叙述了索引的构建。这一节主要谈谈资料的搜索。上回说过索引本身也是一个数据库。它也有自己的sql语法。
    详细请参阅:http://lucene.apache.org/java/docs/queryparsersyntax.html 这里不一一详细说明。
    

查询参数类别
public enum QueryItemType
    {
        And = 0,
        Or = 1,
        Range = 2,
        Fuzzy = 3,
        Wildcard = 4,
        TimeRange = 5
    }

查询条件参数

    [Serializable]

    public class QueryItem
    {
        private string fieldText = string.Empty;
        private string fieldName = string.Empty;
        private string value = string.Empty;
        private string value_1 = string.Empty;
        private QueryItemType type = QueryItemType.And;
        string[] specialcharacters = new string[] { "+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", " :", "\\" };
        private bool isToLower = true;
        private bool isTextQuery = false;
        public QueryItem()
        {

        }
        public QueryItem(string sfieldName, string svalue, string svalue_1, QueryItemType stype)
        {
            fieldName = sfieldName;
            Value = svalue;
            Value_1 = svalue_1;
            type = stype;
        }
        public QueryItem(string ftext, string sfieldName, string svalue, string svalue_1, QueryItemType stype)
        {
            this.fieldText = ftext;
            fieldName = sfieldName;
            Value = svalue;
            Value_1 = svalue_1;
            type = stype;
        }

        public string FieldName
        {
            get
            {
                return fieldName;
            }
            set
            {
                fieldName = value;
            }
        }

        public string Value
        {
            get
            {
                return this.value;
            }
            set
            {
                string temp = value;
                foreach (string str in specialcharacters)
                {
                    temp.Replace(str, " ");
                }
                this.value = temp;
            }
        }

        public string Value_1
        {
            get
            {
                return value_1;
            }
            set
            {
                string temp = value;
                foreach (string str in specialcharacters)
                {
                    temp.Replace(str, " ");
                }
                value_1 = temp;
            }
        }

        public QueryItemType Type
        {
            get
            {
                return type;
            }
            set
            {
                type = value;
            }
        }

        public string FieldText
        {
            get
            {
                return fieldText;
            }
            set
            {
                fieldText = value;
            }
        }

        public bool IsToLower
        {
            get { return isToLower; }
            set { isToLower = value; }
        }

        public bool IsTextQuery
        {
            get { return isTextQuery; }
            set { isTextQuery = value; }
        }
    }

搜索参数

 [Serializable]
    public class SearchParameter
    {
        string[] stopwords = new string[] {"[","]","+","-","or","and","to",":","~","*",
                    "!","@","#","$","%","^","&","*","(",")","|","?","_","\\","//","'","\"",";","{","}","=","<",">"};
        private int _pagesize = 100;
        private int _pageindex = 1;
        private List<QueryItem> queryitem = new List<QueryItem>();
        private Hashtable _keywordfilter = new Hashtable();
        private AnalyzerEnum _analyzertype;
        private string _indexdir;
        private List<SortItem> _sortItems;

        public string IndexDir
        {
            get { return _indexdir; }
            set { _indexdir = value; }
        }

        public int PageSize
        {
            get { return _pagesize; }
            set { _pagesize = value; }
        }

        public int PageIndex
        {
            get { return _pageindex; }
            set { _pageindex = value; }
        }

        public List<QueryItem> QueryItems
        {
            get { return queryitem; }
            set
            {

                foreach (QueryItem item in value)
                {
                    item.Value = ReplaceStr(item.Value);
                }
                queryitem = value;
            }
        }

        public Hashtable KeywordFilter
        {
            get { return _keywordfilter; }
        }

        public void AddKeywordFilter(string QueryField, string QueryText)
        {
            _keywordfilter.Add(QueryField, QueryText);

        }

        public List<SortItem> SortItems
        {
            set
            {
                _sortItems = value;
            }
        }

        public SortField[] SortFields
        {
            get
            {
                if (_sortItems == null) return null;
                SortField[] sortfield = new SortField[_sortItems.Count];
                for (int i = 0; i < _sortItems.Count; i++)
                {
                    SortField sortitem = new SortField(ReplaceStr(_sortItems[i].FieldName.ToUpper()), _sortItems[i].ASC);
                    sortfield[i] = sortitem;
                }
                return sortfield;
            }
        }

        public void RemoveKeywordFilter(string QueryField)
        {
            _keywordfilter.Remove(QueryField);
        }

        public AnalyzerEnum AnalyzerType
        {
            set { _analyzertype = value; }
            get { return _analyzertype; }
        }

        public string GetQueryText()
        {
            if (queryitem.Count > 0)
            {
                foreach (QueryItem item in queryitem)
                {
                    if (item.IsTextQuery)
                    {
                        return item.Value;
                    }
                }
            }
            return string.Empty;
        }

        private string ReplaceStr(string source)
        {
            for (int i = 0; i < stopwords.Length; i++)
            {
                source = source.Replace(stopwords[i], " ");
            }
            return source;
        }
    }

    [Serializable]
    public class SortItem
    {
        //是否降序。true降序,false升序
        public SortItem()
        {

        }

        //是否降序。true降序,false升序
        public SortItem(string fieldName, bool asc)
        {
            FieldName = fieldName;
            ASC = asc;
        }

        public string FieldName = string.Empty;
        public bool ASC = true;
    }

以上代码说明:查询条件和数据库一样,有 or and not 等条件,QueryItemType就是用来设置条件的类型。
QueryItem是单个条件,可用来实现多个条件查询。SortItem是排序使用的,当然也可以多条件排序。注意默认的情况下luncene是以匹配最多的排序的。
这里只是对单索引进行查询,事实上,luncene是支持多索引分布式查询的。

//搜索器

public class Searcher
    {

        public DataTable SearchData(SearchParameter parm, string fields, out  int count)
        {
            return SearchData(parm, fields, "", out count);
        }

        public DataTable SearchData(SearchParameter parm, string fields, string lightfields, out  int count)
        {
            DataTable dt = new DataTable();
            Hits hits = null;
            IndexSearcher searcher = null;
            fields = fields.ToUpper();
            lightfields = lightfields.ToUpper();
            count = 0;
            try
            {
                searcher = new IndexSearcher(parm.IndexDir);
                Query query = Formater.FormatSearchItem(parm);
                if (parm.SortFields != null)
                {
                    Sort sort = new Sort(parm.SortFields);
                    hits = searcher.Search(query, sort);
                }
                else
                {
                    hits = searcher.Search(query);
                }

                if (hits == null || hits.Length() <= 0) return null;
                int start = (parm.PageIndex - 1) * parm.PageSize;
                int end = (parm.PageIndex) * parm.PageSize;

                if (hits.Length() <= end)
                {
                    end = hits.Length();
                }
                count = hits.Length();
                DataColumn dc;
                string[] fieldcolumns = fields.Split(',');
                //创建表格
                foreach (string field in fieldcolumns)
                {
                    dc = new DataColumn();
                    dc.DataType = Type.GetType("System.String");
                    dc.ColumnName = field;
                    dt.Columns.Add(dc);
                }

                //将数据倒入表
                if (lightfields.Length > 0)
                {
                    string text = parm.GetQueryText();
                    List<string> tlist = new List<string>();
                    StringReader sr = new StringReader(text);
                    TokenStream ts = Formater.GetAnalyzer(parm.AnalyzerType).TokenStream("", sr);
                    Token token;
                    while ((token = ts.Next()) != null)
                    {
                        tlist.Add(token.TermText());
                    }
                    sr.Close();
                    for (int i = start; i < end; i++)
                    {
                        DataRow dr = dt.NewRow();
                        foreach (string field in fieldcolumns)
                        {
                            string temp = hits.Doc(i).Get(field);
                            if (text.Length > 0)
                            {
                                string[] lightfield = lightfields.Split(',');

                                foreach (string lfd in lightfield)
                                {
                                    if (field == lfd)
                                    {
                                        foreach (string str in tlist)
                                        {
                                            temp = temp.Replace(str, string.Format("<font color='red'><b>{0}</b></font>", str));
                                        }
                                    }
                                }
                            }

                            dr[field] = temp;
                        }
                        dt.Rows.Add(dr);
                    }
                }
                else
                {

                    for (int i = start; i < end; i++)
                    {
                        DataRow dr = dt.NewRow();
                        foreach (string field in fieldcolumns)
                        {
                            string temp = hits.Doc(i).Get(field);
                            dr[field] = temp;
                        }
                        dt.Rows.Add(dr);
                    }
                }

            }
            catch (Exception err)
            {
                ;
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }
            }
            return dt;
        }

        public DataTable SearchDataAllField(SearchParameter parm, string lightfield, out int count)
        {
            DataTable dt = new DataTable();
            Hits hits = null;
            IndexSearcher searcher = null;
            lightfield = lightfield.ToUpper();
            count = 0;
            try
            {
                searcher = new IndexSearcher(parm.IndexDir);
                Query query = Formater.FormatSearchItem(parm);

                if (parm.SortFields != null)
                {
                    Sort sort = new Sort(parm.SortFields);
                    hits = searcher.Search(query, sort);
                }
                else
                {
                    hits = searcher.Search(query);
                }

                if (hits == null || hits.Length() <= 0) return null;
                int start = (parm.PageIndex - 1) * parm.PageSize;
                int end = (parm.PageIndex) * parm.PageSize;

                if (hits.Length() <= end)
                {
                    end = hits.Length();
                }
                count = hits.Length();
                DataColumn dc;
                //创建表格
                foreach (Field field in hits.Doc(0).Fields())
                {
                    dc = new DataColumn();
                    dc.DataType = Type.GetType("System.String");
                    dc.ColumnName = field.Name();
                    dt.Columns.Add(dc);
                }

                //将数据倒入表
                for (int i = start; i < end; i++)
                {
                    DataRow dr = dt.NewRow();
                    foreach (Field field in hits.Doc(i).Fields())
                    {
                        string temp = field.StringValue();
                        foreach (QueryItem item in parm.QueryItems)
                        {
                            if (lightfield.Length > 0 && field.Name() == lightfield)
                            {
                                temp = temp.Replace(item.Value, string.Format("<font color=orangered><b>{0}</b></font>", item.Value));
                                break;
                            }
                        }
                        dr[field.Name()] = temp;
                    }
                    dt.Rows.Add(dr);
                }
            }
            catch (Exception err)
            {
                ;
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }
            }
            return dt;
        }


        public DataTable SearchDataDistinct(SearchParameter parm, string fieldName, int count)
        {
            DataTable dt = new DataTable();
            Hits hits = null;
            IndexSearcher searcher = null;
            fieldName = fieldName.ToUpper();
            int ccount = 0;
            try
            {
                searcher = new IndexSearcher(parm.IndexDir);
                Query query = Formater.FormatSearchItem(parm);

                if (parm.SortFields != null)
                {
                    Sort sort = new Sort(parm.SortFields);
                    hits = searcher.Search(query, sort);
                }
                else
                {
                    hits = searcher.Search(query);
                }

                if (hits == null || hits.Length() <= 0) return null;

                DataColumn dc;
                //创建表格

                dc = new DataColumn();
                dc.DataType = Type.GetType("System.String");
                dc.ColumnName = fieldName;
                dt.Columns.Add(dc);

                //将数据倒入表
                string temp = string.Empty;
                for (int i = 0; i < hits.Length(); i++)
                {
                    if (ccount >= count) break;
                    string currstring = hits.Doc(i).GetField(fieldName).StringValue();
                    if (currstring.Trim().Length <= 0) continue;
                    if (currstring != temp)
                    {
                        DataRow dr = dt.NewRow();
                        dr[fieldName] = hits.Doc(i).GetField(fieldName).StringValue();
                        dt.Rows.Add(dr);
                        ccount++;
                        temp = currstring;
                    }
                }
            }
            catch (Exception err)
            {
                ;
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }
            }
            return dt;
        }

        public int SearchCount(SearchParameter parm)
        {
            Hits hits = null;
            IndexSearcher searcher = null;
            int count = 0;
            try
            {
                searcher = new IndexSearcher(parm.IndexDir);
                Query query = Formater.FormatSearchItem(parm);
                hits = searcher.Search(query);
                count = hits.Length();
                return count;
            }
            catch
            {
                return 0;
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }
            }
        }


        public int GetDocID(SearchParameter parm)
        {

            Hits hits = null;
            IndexSearcher searcher = new IndexSearcher(parm.IndexDir);
            try
            {
                Query query = Formater.FormatSearchItem(parm);

                if (parm.SortFields != null)
                {
                    Sort sort = new Sort(parm.SortFields);
                    hits = searcher.Search(query, sort);
                }
                else
                {
                    hits = searcher.Search(query);
                }

                if (hits == null || hits.Length() <= 0) return 0;
                if (hits.Length() > 0)
                {
                    return hits.Id(0);
                }
                return 0;
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }
            }
        }

        public List<int> GetDocIDs(SearchParameter parm)
        {
            Hits hits = null;
            List<int> slist = new List<int>();
            IndexSearcher searcher = new IndexSearcher(parm.IndexDir);
            try
            {
                Query query = Formater.FormatSearchItem(parm);

                if (parm.SortFields != null)
                {
                    Sort sort = new Sort(parm.SortFields);
                    hits = searcher.Search(query, sort);
                }
                else
                {
                    hits = searcher.Search(query);
                }

                if (hits == null || hits.Length() <= 0) return slist;
                for (int i = 0; i < hits.Length(); i++)
                {
                    slist.Add(hits.Id(i));
                }
                return slist;
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }
            }
        }
    }

    以上是对luncene搜索的封装,可分页查询数据,针对单索引。
原来luncene有个高亮命中词的组建,不知道是不是我比较愚钝,一直没明白怎么用,而且跟版本捆绑太紧密,所以我将搜索结果通过分词替换掉。
下面是应用举例:
 SearchParameter sp = new SearchParameter();
            sp.AnalyzerType = AnalyzerEnum.SmartSegmentAnalyzer;
            sp.IndexDir = UICommon.IndexDir;
            sp.PageIndex = 1;
            QueryItem item;
            List<QueryItem> list;
            Searcher search = new Searcher();
            DataTable mdt;

               item = new QueryItem();
                item.FieldName = "artitype";
                item.Value = ((int)SDataType.Course).ToString();
                list.Add(item);

                sp.QueryItems = list;
                sp.PageSize = 3;
                try
                {
                    mdt = search.SearchData(sp, "courseid,coursename", out ccount);
                }
                catch
                {
                    sp.IndexDir = UICommon.TempIndexDir;
                    mdt = new Searcher().SearchData(sp, "courseid,coursename", out ccount);
                }
以表的形式返回,而lucene是以docuemnt的方式返回的。
以上就是luncene的基本使用,索引----〉搜索。

posted on 2007-10-11 16:53  xuwenzhuo  阅读(1371)  评论(3)    收藏  举报

导航