关键字过滤算法

using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Collections;

namespace BLL.Common
{
   
#region 操作类
   
public class KeywordsFilter
    {
       


       
#region 关键字过滤
       
/// <summary>
       
/// 关键字过滤
       
///
       
/// </summary>
       
/// <param name="keywords"></param>
       
/// <returns></returns>
        public static string Filter(string keywords)
        {

           
//需过滤关键字集合
            List<string> badwords = new List<string>();
           
            KeywordsFilterClass kf
= new KeywordsFilterClass();
            keywords
= kf.BadwordInKeywords(keywords, badwords);
           
return keywords;
        }
       
#endregion

    }
   
#endregion

   
#region 关键字过滤类
   
/// <summary>
   
/// 关键字过滤类
   
/// </summary>
    public class KeywordsFilterClass
    {

       
private Dictionary<string, object> hash = new Dictionary<string, object>();
       
//脏字字典 开头脏字存储
        private BitArray firstCharCheck = new BitArray(char.MaxValue);
       
//脏字字典 单个char存储
        private BitArray allCharCheck = new BitArray(char.MaxValue);
       
private int maxLength = 0;

       
/// <summary>
       
/// 初始化 已存储的 过滤字符串
       
/// </summary>
       
/// <param name="words"></param>
        private void InitHash(List<string> badwords)
        {
           
foreach (string word in badwords)
            {
               
//保存字典内不存在的脏字
                if (!hash.ContainsKey(word))
                {
                    hash.Add(word,
null);
                   
//设置脏字计算长度
                    this.maxLength = Math.Max(this.maxLength, word.Length);
                    firstCharCheck[word[
0]] = true;
                   
foreach (char c in word)
                    {
                        allCharCheck[c]
= true;
                    }
                }
            }

        }
       
/// <summary>
       
/// 替换字符串中的脏字为指定的字符
       
/// </summary>
       
/// <param name="text"></param>
       
/// <returns></returns>
        public string BadwordInKeywords(string text, List<string> badwords)
        {
           
//初始化 脏字字典
            this.InitHash(badwords);
           
int index = 0;

           
while (index < text.Length)
            {
               
//判断开头脏字
                if (!firstCharCheck[text[index]])
                {
                   
//未找到开头脏字 则索引累加
                    while (index < text.Length - 1 && !firstCharCheck[text[++index]]) ;
                }
               
for (int j = 1; j <= Math.Min(maxLength, text.Length - index); j++)
                {
                   
if (!allCharCheck[text[index + j - 1]])
                    {
                       
break;
                    }
                   
string sub = text.Substring(index, j);

                   
if (hash.ContainsKey(sub))
                    {
                        text
= text.Replace(sub, "**");
                       
//this.InitHash(badwords);
                        index += j;
                       
break;
                    }
                }

                index
++;
            }
           
return text;
        }


    }
   
#endregion
}

posted @ 2010-09-20 16:45  单单的忧伤  Views(273)  Comments(0)    收藏  举报