代码改变世界

快速从多个txt文本里面查找重复号码

2011-05-05 14:02  曾祥展  阅读(649)  评论(3编辑  收藏

批量快速查找重复号码小程序 界面如下:

 

txt文本里面数据格式:一个号码一行

重点代码:

/// <summary>
/// 从文本文件中返回重复的字符串行
/// </summary>
/// <param name="Path"></param>
/// <returns></returns>
public string GetRepeatStrFromArrayList(string Path)
{
    Dictionary
<stringint> array = GetArrayListFromTxt(Path);
    ArrayList list 
= new ArrayList();
    
string buildstr = "";
    
int i = 0;
    
foreach (var item in array)
    {
        
this.progressBar1.Maximum = array.Count;
        
this.progressBar1.Value = i;

        i
++;

        
if (item.Value > 1)
        {
            buildstr 
+= item.Value + "个   " + item.Key + "\r\n";
        }

    }

    
if (buildstr == "")
    {
        buildstr 
= "查找完毕!没有重复!";
    }
    
else
    {
        buildstr 
+= "\r\n"+"以上是重复的!";
        buildstr 
+= "总共:" + array.Count+"个不重复!  ";
        buildstr 
+= "查找完毕!";
    }

    
return buildstr;
}


/// <summary>
/// 将文本文件中数据读取出来并放到Dictionary中返回
/// </summary>
/// <param name="Path"></param>
/// <returns></returns>
public static Dictionary<stringint> GetArrayListFromTxt(string Path)
{
    
string strLine = "";
    Dictionary
<stringint> dct = new Dictionary<stringint>();
    
int i = 1;
    
try
    {
        FileStream fs 
= new FileStream(Path, FileMode.OpenOrCreate, FileAccess.ReadWrite);
        StreamReader sr 
= new StreamReader(fs, System.Text.Encoding.Default);

        
while (strLine != null)
        {
            strLine 
= sr.ReadLine();
            
if (dct.ContainsKey(strLine))//如果字典中已存在这个键,给这个键值加1
            {
                dct[strLine]
++;
            }
            
else
            {
                dct.Add(strLine, 
1);//字典中不存在这个键,加入这个键
            }
            i
++;
        }
        sr.Dispose();
        sr.Close();
        fs.Close();

    }
    
catch (Exception ex)
    {
    }
    
return dct;
}

 

 

 

 

 //找出连续相同的个数(不超过n)

var a = GetListFromTxt(txtPath.Text);
if (a.Count() < 1)
{
return;
}
if (checkBox1.Checked == true)
{
a = a.OrderBy(x => x).ToList();
}
int i, num;
string last;
List<string> list = new List<string>();
StringBuilder s = new StringBuilder();
for (num = 1, last = a[0], i = 1; i < a.Count(); ++i)
{
if (last.Substring(0, m) == a[i].Substring(0, m))
{
++num;
list.Add(a[i]);
}
else
{
list.Insert(0, last);
if (num < n)
{
foreach (string item in list)
{
s.AppendLine(item);
}
}
list.Clear();
last = a[i];
num = 1;
}
}
if (num < n)
{
list.Insert(0, last);
foreach (string item in list)
{
s.AppendLine(item);
}
}
list.Clear();
txtListPhone.Text = s.ToString().TrimEnd((char[])"\r\n".ToCharArray()); //赋值 

 


  /// 相似度匹配算法
  public class LevenshteinDistance
  {
        private static LevenshteinDistance _instance = null;
        public static LevenshteinDistance Instance
        {
            get
            {
                if (_instance == null)
                {
                    return new LevenshteinDistance();
                }
                return _instance;
            }
        }

        /// <summary>
        
/// 取最小的一位数
        
/// </summary>
        
/// <param name=”first”></param>
        
/// <param name=”second”></param>
        
/// <param name=”third”></param>
        
/// <returns></returns>
        public int LowerOfThree(int first, int second, int third)
        {
            int min = Math.Min(first, second);
            return Math.Min(min, third);
        }
        /// <summary>
        
/// 
        
/// </summary>
        
/// <param name="str1"></param>
        
/// <param name="str2"></param>
        
/// <returns></returns>
        public int Levenshtein_Distance(string str1, string str2)
        {
            int[,] Matrix;
            int n = str1.Length;
            int m = str2.Length;

            int temp = 0;
            char ch1;
            char ch2;
            int i = 0;
            int j = 0;
            if (n == 0)
            {
                return m;
            }
            if (m == 0)
            {

                return n;
            }
            Matrix = new int[n + 1, m + 1];

            for (i = 0; i <= n; i++)
            {
                //初始化第一列
                Matrix[i, 0] = i;
            }

            for (j = 0; j <= m; j++)
            {
                //初始化第一行
                Matrix[0, j] = j;
            }

            for (i = 1; i <= n; i++)
            {
                ch1 = str1[i - 1];
                for (j = 1; j <= m; j++)
                {
                    ch2 = str2[j - 1];
                    if (ch1.Equals(ch2))
                    {
                        temp = 0;
                    }
                    else
                    {
                        temp = 1;
                    }
                    Matrix[i, j] = LowerOfThree(Matrix[i - 1, j] + 1, Matrix[i, j - 1] + 1, Matrix[i - 1, j - 1] + temp);

                }
            }

            //for (i = 0; i <= n; i++)
            
//{
            
//    for (j = 0; j <= m; j++)
            
//    {
            
//        Console.Write(" {0} ", Matrix[i, j]);
            
//    }
            
//    Console.WriteLine("");
            
//}
            return Matrix[n, m];
        }

        /// <summary>
        
/// 计算字符串相似度
        
/// </summary>
        
/// <param name=”str1″></param>
        
/// <param name=”str2″></param>
        
/// <returns></returns>
        public decimal LevenshteinDistancePercent(string str1, string str2)
        {
            int val = Levenshtein_Distance(str1, str2);
            return 1 - (decimal)val / Math.Max(str1.Length, str2.Length);
        }
    }