KMP字符串匹配算法
子串的定位操作通常称为串的模式匹配。以下算法中:S—主串,T—子串(模式串)字符数组存储从下标 1 开始,String[0] 记录字符数组长度。
#include <iostream> #include <stdio.h> #include <string> using namespace std; #define MAXSTRLEN 255 typedef unsigned char SString[MAXSTRLEN + 1]; int Index(SString S, SString T, int pos); //普通匹配算法 int Index_KMP(SString S, SString T, int pos); void get_next(SString T, int next[]); //获取下次比较的模式串的下标 void get_nextval(SString T, int nextval[]); //改进的模式串下标获取方法 int main () { char c, s[MAXSTRLEN], t[MAXSTRLEN]; SString S, T, next; int n, i, pos; do { cout<<"enter a string:"; cin >> s; for (i = 0; s[i] != '\0'; i++) S[i + 1] = s[i]; S[0] = i; cout<<"enter a test string:"; cin >> t; for (i = 0; t[i] != '\0'; i++) T[i + 1] = t[i]; T[0] = i; cout<<"输入从主串查找的位置n:"; cin >> n; pos = Index_KMP(S, T, n); cout<<pos<<endl; cout<<"是否继续(y/n):"; cin >> c; }while(c == 'y' || c == 'Y'); return 0; }
KMP核心处理函数:对于j = 0 || S[i] = T[j]时,比较主串和子串的下一个字符;否则,主串待比较下标 i 不变,获取子串的待比较下标 j = next[j]。
int Index_KMP(SString S, SString T, int pos) { int next[MAXSTRLEN]; int i = pos, j = 1; get_next(T, next); // get_nextval(T, next); while(i <= S[0] && j <= T[0]) { if (j == 0 || S[i] == T[j]) { ++ i; ++ j;} else j = next[j]; } if (j > T[0]) return i - T[0]; else return 0; }
模式串T中next函数值的获取:
void get_next(SString T, int next[]) { int i = 1; next[1] = 0; int j = 0; while(i < T[0]) { if ( j == 0 || T[i] == T[j]) { ++i; ++j; next[i] = j;} else j = next[j]; } }
改进的nextval函数值的获取:
void get_nextval(SString T, int nextval[]) { int i = 1; nextval[1] = 0; int j = 0; while (i < T[0]) { if (j == 0 || T[i] == T[j]) { ++ i; ++ j; if (T[i] != T[j]) nextval[i] = j; else nextval[i] = nextval[j]; } else j = nextval[j]; } }