字符串相似度的算法(sqlserver转和c#)

CREATE   function get_semblance_By_2words
(
@word1 varchar(50),
@word2 varchar(50)  
)
returns nvarchar(4000)
as
begin
declare @re int
declare @maxLenth int
declare @i int,@l int
declare @tb1 table(child varchar(50))
declare @tb2 table(child varchar(50))
set @i=1
set @l=2
set @maxLenth=len(@word1)
if len(@word1)<len(@word2) 
begin
set @maxLenth=len(@word2)
end
while @l<=len(@word1) 
begin
while @i<len(@word1)-1
begin
insert @tb1 (child) values( SUBSTRING(@word1,@i,@l) ) 
set @i=@i+1
end
set @i=1
set @l=@l+1
end
set @i=1
set @l=2
while @l<=len(@word2) 
begin
while @i<len(@word2)-1
begin
insert @tb2 (child) values( SUBSTRING(@word2,@i,@l) ) 
set @i=@i+1
end
set @i=1
set @l=@l+1
end   
select @re=isnull(max( len(a.child)*100/  @maxLenth ) ,0) from @tb1 a, @tb2 b where a.child=b.child
return @re
end 
GO
 
--测试
--select dbo.get_semblance_By_2words('我是谁','我是谁啊') 
--75

--相似度 

c#------------------------------------------------------

using System;
using System.Collections.Generic;
using System.Text;

namespace ConsoleApplication6
{
    class semblance
    {

        static void Main(string[] args)
        {
            string re= get_semblance_By_2words("我是谁", "我是谁啊");
            Console.WriteLine(re);
            Console.ReadLine();
        }

        public static string get_semblance_By_2words(string word1, string word2)
        {
            int re = 0;
            int maxLength;
            int i, l;
            List<string> tb1 = new List<string>();
            List<string> tb2 = new List<string>();
            i = 0;
            l = 1;
            maxLength = word1.Length;
            if (word1.Length < word2.Length)
                maxLength = word2.Length;
            while (l <= word1.Length)
            {
                while (i < word1.Length - 1)
                {
                    if (i + l > word1.Length)
                        break;
                    tb1.Add(word1.Substring(i, l));
                    i++;
                }
                i = 0;
                l++;
            }

            i = 0;
            l = 1;

            while (l <= word2.Length)
            {
                while (i < word2.Length - 1)
                {
                    if (i + l > word2.Length)
                        break;
                    tb2.Add(word2.Substring(i, l));
                    i++;
                }
                i = 0;
                l++;
            }
            foreach (string subStr in tb1)
            {
                int tempRe = 0;
                if (tb2.Contains(subStr))
                {
                    tempRe = subStr.Length * 100 / maxLength;
                    if (tempRe > re)
                        re = tempRe;
                    if (tempRe == 100)
                        break;
                }
            }
            return re.ToString()+"%";
        }
    }
}


posted on 2009-03-11 15:03  ATAK  阅读(1328)  评论(0编辑  收藏  举报

导航