字符串按照相似度排序

1 /**
2  * 字符串列表按照与某一个字符串的相似度降序排列
3  *
4  * @author weihainan.
5  * @since 0.1 created on 2017/5/11.
6  */
7 public class SimilarityUtil {
8
9     private static int compare(String str, String target) {
10         int d[][];              // 矩阵
11         int n = str.length();
12         int m = target.length();
13         int i;                  // 遍历str的
14         int j;                  // 遍历target的
15         char ch1;               // str的
16         char ch2;               // target的
17         int temp;               // 记录相同字符,在某个矩阵位置值的增量,不是0就是1
18         if (n == 0) {
19             return m;
20         }
21         if (m == 0) {
22             return n;
23         }
24         d = new int[n + 1][m + 1];
25         for (i = 0; i <= n; i++) {                       // 初始化第一列
26             d[i][0] = i;
27         }
28
29         for (j = 0; j <= m; j++) {                       // 初始化第一行
30             d[0][j] = j;
31         }
32
33         for (i = 1; i <= n; i++) {                       // 遍历str
34             ch1 = str.charAt(i - 1);
35             // 去匹配target
36             for (j = 1; j <= m; j++) {
37                 ch2 = target.charAt(j - 1);
38                 if (ch1 == ch2 || ch1 == ch2 + 32 || ch1 + 32 == ch2) {
39                     temp = 0;
40                 } else {
41                     temp = 1;
42                 }
43                 // 左边+1,上边+1, 左上角+temp取最小
44                 d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp);
45             }
46         }
47         return d[n][m];
48     }
49
50     private static int min(int one, int two, int three) {
51         return (one = one < two ? one : two) < three ? one : three;
52     }
53
54     /**
55      * 获取两字符串的相似度
56      */
57     public static float getSimilarityRatio(String str, String target) {
58         return 1 - (float) compare(str, target) / Math.max(str.length(), target.length());
59     }
60
61
62     public static List<String> sortBySimilar(String key, List<String> list) {
63         if (CollectionUtils.isEmpty(list)) {
64             return Lists.newArrayList();
65         }
66         List<Comp> comps = Lists.newArrayList();
67         for (String str : list) {
68             float ratio = getSimilarityRatio(str, key);
69             comps.add(new Comp(ratio, str));
70         }
71         Collections.sort(comps, new Comparator<Comp>() {
72             public int compare(Comp o1, Comp o2) {
73                 return (o2.getRatio() < o1.getRatio()) ? -1 : ((o2.getRatio() == o1.getRatio()) ? 0 : 1);
74             }
75         });
76         List<String> res = Lists.newArrayList();
77         for (Comp comp : comps) {
79         }
80         return res;
81     }
82
83     public static class Comp {
84         private String str;
85         private float ratio;
86
87         public Comp(float ratio, String str) {
88             this.ratio = ratio;
89             this.str = str;
90         }
91
92         public float getRatio() {
93             return ratio;
94         }
95
96         public void setRatio(float ratio) {
97             this.ratio = ratio;
98         }
99
100         public String getStr() {
101             return str;
102         }
103
104         public void setStr(String str) {
105             this.str = str;
106         }
107
108         @Override
109         public boolean equals(Object o) {
110             if (this == o) return true;
111             if (o == null || getClass() != o.getClass()) return false;
112             Comp comp = (Comp) o;
113             return Float.compare(comp.ratio, ratio) == 0 &&
114                     Objects.equal(str, comp.str);
115         }
116
117         @Override
118         public int hashCode() {
119             return Objects.hashCode(str, ratio);
120         }
121     }
122
123     public static void main(String[] args) {
124         System.out.println(sortBySimilar("1234", Lists.newArrayList("1", "1234", "23", "56")));
125     }
126
127 }

posted @ 2017-05-12 11:23  yweihainan  阅读(2031)  评论(0编辑  收藏  举报