【模板】BM字符串匹配

据说效率是 KMP\(3 \sim 4\) 倍。

主要利用坏字符和好后缀进行跳转来避免过多的匹配。

这篇博客讲的很好,推荐大家看看。

#include <iostream>
#include <vector>
#include <string>
const int size_of_cs = 256;
std :: vector<int> bad_char;
std :: vector<int> suffix;
std :: vector<bool> prefix;
void get_bad_char(std :: string &pattern) {
	bad_char.clear();
	for(int i = 0;i < size_of_cs;++i) 
		bad_char.emplace_back(-1);
	for(int i = 0;i < pattern.size();++i) 
		bad_char[(int)pattern[i]] = i;
}
void get_good_suf(std :: string &pattern) {
	for(int i = 0;i < pattern.size();++i) {
		suffix.emplace_back(-1);
		prefix.emplace_back(false);
	}
	for(int i = 0, j, k;i < pattern.size()-1;++i) {
		j = i;
		k = 0;
		while(j >= 0&&pattern[j] == pattern[pattern.size()-1-k]) 
			suffix[++k] = (--j)+1;
		if(j < 0) 
			prefix[k] = true;
	}
}
int move_by_good_suf(int pos,int length) {
	int k = length-1-pos;
	if(suffix[k] != -1) 
		return pos-suffix[k]+1;
	for(int i = pos+2;i < length;++i) 
		if(prefix[length = i]) 
			return i;
	return length;
}
int BM(std :: string &text,std :: string &pattern) {
	if(pattern.empty()) 
		return 0;
	get_bad_char(pattern);
	get_good_suf(pattern);
	for(int i = 0, x, y;i <= text.size()-pattern.size();i += std :: max(x,y)) {
		int j;
		for(j = pattern.size()-1;j >= 0;--j) 
			if(text[i+j] != pattern[j]) 
				break;
		if(j < 0) 
			return i;
		x = j-bad_char[(int)text[i+j]];
		y = 0;
		if(j < pattern.size()-1) 
			y = move_by_good_suf(j,pattern.size());
	}
	return -1;
}
std :: string text, pattern;
int main() {
	std :: cin >> text >> pattern;
	std :: cout << BM(text,pattern);
	return 0;
}

其实写了一版传统的 char* 的。

打炸了,再调罢。

#include <iostream>
#include <cstring>
const int size_of_cs = 256;
const int max_length_of_pattern = 131026;
int bad_char[size_of_cs];
int suffix[max_length_of_pattern];
bool prefix[max_length_of_pattern];
void get_bad_char(char *begin,char *end) {
	memset(bad_char,-1,sizeof(bad_char));
	for(int i = 0;begin+i != end;++i) 
		bad_char[(int)(*(begin+i))] = i;
}
void get_good_suf(char *begin,char *end) {
	int length = (long long)end-(long long)begin;
	memset(suffix,-1,length*sizeof(int));
	memset(prefix,false,length*sizeof(bool));
	for(int i = 0, j, k;i < length-1;++i) {
		j = i;
		k = 0;
		while(j >= 0&&*(begin+j) == *(begin+(length-1-k))) {
			--j;
			++k;
			suffix[k] = j+1;
		}
		if(j < 0) 
			prefix[k] = true;
	}
}
int move_by_good_suffix(int pos,int length) {
	int k = length-1-pos;
	if(suffix[k] != -1) 
		return pos-suffix[k]+1;
	for(int i = pos+2;i < length;++i) 
		if(prefix[length = i]) 
			return i;
	return length;
}
int BM(char *text_begin,char *text_end,char *pattern_begin,char *pattern_end) {
	int size_of_text = (long long)text_end-(long long)text_begin;
	int size_of_pattern = (long long)pattern_end-(long long)pattern_begin;
	if(size_of_pattern <= 0) 
		return 0;
	get_bad_char(pattern_begin,pattern_end);
	get_good_suf(pattern_begin,pattern_end);
	for(int i = 0, j, x, y;i <= size_of_text-size_of_pattern;i = i+std :: max(x,y)) {
		for(j = size_of_pattern-1;j >= 0;--j) 
			if(*(text_begin+(i+j)) != *(pattern_begin+j)) 
				break;
		if(j < 0) 
			return i;
		x = j-bad_char[(int)*(text_begin+(i+j))];
		y = 0;
		if(j < size_of_pattern-1) 
			y = move_by_good_suffix(j,size_of_pattern);
	}
	return -1;
}
char text[max_length_of_pattern];
char pattern[max_length_of_pattern];
int main() {
	scanf("%s %s",text,pattern);
	printf("%d\n",BM(text,text+strlen(text),pattern,pattern+strlen(pattern)));
	return 0;
}

行了,改好了。

posted @ 2022-09-29 21:29  bikuhiku  阅读(21)  评论(0编辑  收藏  举报