最大匹配字符串LCS,The Longest Common Substring
public enum BackTracking { UP, LEFT, NEITHER, UP_AND_LEFT } public abstract class LCSBaseMatch { /// <summary> /// 设置连续字符的匹配值 /// </summary> /// <param name="length"></param> /// <returns></returns> protected virtual int ConsecutiveMeasure(int length) { return length * length; } /// <summary> /// 获取两个string字符串的匹配度 /// </summary> /// <param name="list1"></param> /// <param name="list2"></param> /// <returns></returns> public virtual int GetMatchScoreOfLCS(char[] list1, char[] list2) { int[,] lcs;//最大匹配度 BackTracking[,] backTracer;//需要执行的操作 int score = GetMatchScoreOfLCS(list1, list2, out lcs, out backTracer);//最终匹配度 return score; } /// <summary> /// 计算匹配度 /// </summary> /// <param name="list1"></param> /// <param name="list2"></param> /// <param name="lcs"></param> /// <param name="backTracer"></param> /// <returns></returns> protected int GetMatchScoreOfLCS(char[] list1, char[] list2, out int[,] lcs, out BackTracking[,] backTracer) { int m = list1.Length; int n = list2.Length; lcs = new int[m, n];//最大匹配度 backTracer = new BackTracking[m, n];//需要执行的操作 int[,] w = new int[m, n];//连续匹配的长度 int i, j; #region 初始化lcs、backTracer for (i = 0; i < m; ++i) { lcs[i, 0] = 0; backTracer[i, 0] = BackTracking.UP; } for (j = 0; j < n; ++j) { lcs[0, j] = 0; backTracer[0, j] = BackTracking.LEFT; } #endregion #region 给lcs、backTracer、w赋值 for (i = 0; i < m; i++) { for (j = 0; j < n; j++) { if (list1[i] == list2[j]) { int k = 0; int prev = 0; if (i > 0 && j > 0) { k = w[i - 1, j - 1]; prev = lcs[i - 1, j - 1]; } //eviation unit between k+1 and k instead of 1 in basic LCS lcs[i, j] = prev + ConsecutiveMeasure(k + 1) - ConsecutiveMeasure(k); backTracer[i, j] = BackTracking.UP_AND_LEFT; w[i, j] = k + 1; } if (i > 0 && (lcs[i - 1, j] > lcs[i, j])) { lcs[i, j] = lcs[i - 1, j]; backTracer[i, j] = BackTracking.UP; w[i, j] = 0; } if (j > 0 && (lcs[i, j - 1] > lcs[i, j])) { lcs[i, j] = lcs[i, j - 1]; backTracer[i, j] = BackTracking.LEFT; w[i, j] = 0; } } } #endregion return lcs[m - 1, n - 1];//最终匹配度 } } public class LCSMatchForString : LCSBaseMatch { /// <summary> /// get The Longest Common Substring /// </summary> /// <param name="list1"></param> /// <param name="list2"></param> /// <returns></returns> public string LCS(string s1, string s2) { char[] list1 = s1.ToArray(); char[] list2 = s2.ToArray(); int m = list1.Length; int n = list2.Length; int[,] lcs ;//最大匹配度 BackTracking[,] backTracer ;//需要执行的操作 int score = GetMatchScoreOfLCS(list1,list2,out lcs,out backTracer);//最终匹配度 #region 获取最大匹配的字符串 int i = m - 1; int j = n - 1; string subseq = ""; //trace the backtracking matrix. while (i >=0 && j >=0) { if (backTracer[i, j] == BackTracking.NEITHER) break; if (backTracer[i, j] == BackTracking.UP_AND_LEFT) { subseq = list1[i]+ subseq; i--; j--; } else if (backTracer[i, j] == BackTracking.UP) { i--; } else if (backTracer[i, j] == BackTracking.LEFT) { j--; } } #endregion return subseq; } }