【模板】BM字符串匹配
据说效率是 KMP
的 \(3 \sim 4\) 倍。
主要利用坏字符和好后缀进行跳转来避免过多的匹配。
这篇博客讲的很好,推荐大家看看。
#include <iostream>
#include <vector>
#include <string>
const int size_of_cs = 256;
std :: vector<int> bad_char;
std :: vector<int> suffix;
std :: vector<bool> prefix;
void get_bad_char(std :: string &pattern) {
bad_char.clear();
for(int i = 0;i < size_of_cs;++i)
bad_char.emplace_back(-1);
for(int i = 0;i < pattern.size();++i)
bad_char[(int)pattern[i]] = i;
}
void get_good_suf(std :: string &pattern) {
for(int i = 0;i < pattern.size();++i) {
suffix.emplace_back(-1);
prefix.emplace_back(false);
}
for(int i = 0, j, k;i < pattern.size()-1;++i) {
j = i;
k = 0;
while(j >= 0&&pattern[j] == pattern[pattern.size()-1-k])
suffix[++k] = (--j)+1;
if(j < 0)
prefix[k] = true;
}
}
int move_by_good_suf(int pos,int length) {
int k = length-1-pos;
if(suffix[k] != -1)
return pos-suffix[k]+1;
for(int i = pos+2;i < length;++i)
if(prefix[length = i])
return i;
return length;
}
int BM(std :: string &text,std :: string &pattern) {
if(pattern.empty())
return 0;
get_bad_char(pattern);
get_good_suf(pattern);
for(int i = 0, x, y;i <= text.size()-pattern.size();i += std :: max(x,y)) {
int j;
for(j = pattern.size()-1;j >= 0;--j)
if(text[i+j] != pattern[j])
break;
if(j < 0)
return i;
x = j-bad_char[(int)text[i+j]];
y = 0;
if(j < pattern.size()-1)
y = move_by_good_suf(j,pattern.size());
}
return -1;
}
std :: string text, pattern;
int main() {
std :: cin >> text >> pattern;
std :: cout << BM(text,pattern);
return 0;
}
其实写了一版传统的 char*
的。
打炸了,再调罢。
#include <iostream>
#include <cstring>
const int size_of_cs = 256;
const int max_length_of_pattern = 131026;
int bad_char[size_of_cs];
int suffix[max_length_of_pattern];
bool prefix[max_length_of_pattern];
void get_bad_char(char *begin,char *end) {
memset(bad_char,-1,sizeof(bad_char));
for(int i = 0;begin+i != end;++i)
bad_char[(int)(*(begin+i))] = i;
}
void get_good_suf(char *begin,char *end) {
int length = (long long)end-(long long)begin;
memset(suffix,-1,length*sizeof(int));
memset(prefix,false,length*sizeof(bool));
for(int i = 0, j, k;i < length-1;++i) {
j = i;
k = 0;
while(j >= 0&&*(begin+j) == *(begin+(length-1-k))) {
--j;
++k;
suffix[k] = j+1;
}
if(j < 0)
prefix[k] = true;
}
}
int move_by_good_suffix(int pos,int length) {
int k = length-1-pos;
if(suffix[k] != -1)
return pos-suffix[k]+1;
for(int i = pos+2;i < length;++i)
if(prefix[length = i])
return i;
return length;
}
int BM(char *text_begin,char *text_end,char *pattern_begin,char *pattern_end) {
int size_of_text = (long long)text_end-(long long)text_begin;
int size_of_pattern = (long long)pattern_end-(long long)pattern_begin;
if(size_of_pattern <= 0)
return 0;
get_bad_char(pattern_begin,pattern_end);
get_good_suf(pattern_begin,pattern_end);
for(int i = 0, j, x, y;i <= size_of_text-size_of_pattern;i = i+std :: max(x,y)) {
for(j = size_of_pattern-1;j >= 0;--j)
if(*(text_begin+(i+j)) != *(pattern_begin+j))
break;
if(j < 0)
return i;
x = j-bad_char[(int)*(text_begin+(i+j))];
y = 0;
if(j < size_of_pattern-1)
y = move_by_good_suffix(j,size_of_pattern);
}
return -1;
}
char text[max_length_of_pattern];
char pattern[max_length_of_pattern];
int main() {
scanf("%s %s",text,pattern);
printf("%d\n",BM(text,text+strlen(text),pattern,pattern+strlen(pattern)));
return 0;
}
行了,改好了。