overlap 字符串重复次数

直接上code:

 1   public class OverlapCoefficient
 2     {
 3         private Collection<string> tokenSet;
 4 
 5         private string[] delimiters;
 6 
 7         public OverlapCoefficient()
 8         {
 9             this.tokenSet = new Collection<string>();
10             this.delimiters = new string[] { " ", "-", "-", "_" };
11         }
12 
13         public int GetSimilarity(string first, string second)
14         {
15             if (first != null && second != null)
16             {
17                 return this.GetOverlapCount(this.Tokenize(first), this.Tokenize(second));
18             }
19 
20             return 0;
21         }
22 
23 
24         public int GetOverlapCount(Collection<string> firstTokens, Collection<string> secondTokens)
25         {
26             this.tokenSet.Clear();
27             int firstSetTokenCount = this.CalculateUniqueTokensCount(firstTokens);
28             int secondSetTokenCount = this.CalculateUniqueTokensCount(secondTokens);
29             this.MergeIntoSet(firstTokens);
30             this.MergeIntoSet(secondTokens);
31 
32             int overlap = firstSetTokenCount + secondSetTokenCount - this.tokenSet.Count;
33             return overlap;
34         }
35 
36         private void MergeIntoSet(Collection<string> firstTokens)
37         {
38             foreach (string local in firstTokens)
39             {
40                 if (!this.tokenSet.Contains(local))
41                 {
42                     this.tokenSet.Add(local);
43                 }
44             }
45         }
46 
47         private Collection<string> Tokenize(string word)
48         {
49             Collection<string> res = new Collection<string>(word.Split(this.delimiters, StringSplitOptions.RemoveEmptyEntries).ToArray());
50             return res;
51         }
52 
53         private int CalculateUniqueTokensCount(Collection<string> tokenList)
54         {
55             Collection<string> collection = new Collection<string>();
56             foreach (string local in tokenList)
57             {
58                 if (!collection.Contains(local))
59                 {
60                     collection.Add(local);
61                 }
62             }
63 
64             return collection.Count;
65         }
66     }

 

  

posted @ 2018-02-07 14:58  学海无涯,赤子之心  阅读(198)  评论(0编辑  收藏  举报