专题:AC自动机
搜索关键词
题意:有多少单词在文本中出现过
题解:对单词结尾的位置记录一个cnt
在自动机上找到了匹配到i为止最深的结点j
那么所有的ne[j],ne[ne[j]]...也是可以匹配的
//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;
const int N = 5e5 + 10, M = 1e6 + 10;
int n, tr[N][26], cnt[N], idx, ne[N];
char str[M];
//trie
void insert() {
int p = 0;
for (int i = 0; str[i]; i++) {
int c = str[i] - 'a';
if (!tr[p][c]) tr[p][c] = ++idx;
p = tr[p][c];
}
cnt[p]++;
}
void build() {
queue<int> q;
for (int i = 0; i < 26; i++) {
if (tr[0][i]) q.push(tr[0][i]);
}
while (!q.empty()) {
int u = q.front(); q.pop();
for (int i = 0; i < 26; i++) {
int v = tr[u][i];
if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
int j = ne[u];// in KMP: j = next[i - 1]
ne[v] = tr[j][i];
q.push(v);
}
}
}
void solve() {
memset(tr, 0, sizeof tr);
memset(cnt, 0, sizeof cnt);
memset(ne, 0, sizeof ne);
idx = 0;
cin >> n;
for (int i = 1; i <= n; i++) {
cin >> str;
insert();
}
build();
cin >> str;
int res = 0;
for (int i = 0, j = 0; str[i]; i++) {
int c = str[i] - 'a';
j = tr[j][c];
//找到了匹配到i为止最深的结点
//那么所有的ne[j],ne[ne[j]]...也是可以匹配的
int p = j;
while (p) {
res += cnt[p];
cnt[p] = 0;
p = ne[p];
}
}
cout << res << "\n";
}
int main() {
ios::sync_with_stdio(false); cin.tie(0);
int T;
cin >> T;
while (T--) solve();
return 0;
}
单词
题意:每个串在所有串中出现了多少次
所有的ne[x]向x连边,将构成一个树形结构
考虑x的答案会累加给ne[x],按照BFS序的倒序进行树形DP即可
//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;
const int N = 1e6 + 10;
int n, tr[N << 1][26], idx, f[N << 1], ne[N << 1], pos[N];
char str[N];
//trie
void insert(int x) {
int p = 0;
for (int i = 0; str[i]; i++) {
int c = str[i] - 'a';
if (!tr[p][c]) tr[p][c] = ++idx;
p = tr[p][c];
f[p]++;
}
pos[x] = p;
}
vector<int> vec;
void build() {
queue<int> q;
for (int i = 0; i < 26; i++) {
if (tr[0][i]) q.push(tr[0][i]);
}
while (!q.empty()) {
int u = q.front(); q.pop(); vec.push_back(u);//idx个点
for (int i = 0; i < 26; i++) {
int v = tr[u][i];
if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
int j = ne[u];// in KMP: j = next[i - 1]
ne[v] = tr[j][i];
q.push(v);
}
}
}
int main() {
ios::sync_with_stdio(false); cin.tie(0);
cin >> n;
for (int i = 1; i <= n; i++) {
cin >> str;
insert(i);
}
build();
for (int i = idx - 1; i >= 0; i--) f[ne[vec[i]]] += f[vec[i]];
for (int i = 1; i <= n; i++) cout << f[pos[i]] << "\n";
return 0;
}
JSOI2007 文本生成器
题意:
给出n个串,求有多少长度为m的串至少包含上述一个串
题解:
状态机模型DP
//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;
const int N = 2e5 + 10, mod = 10007;
int n, m, tr[N][26], idx, ne[N], mk[N], dp[110][N];
char str[N];
int pow(int a, int b, int p) {
int res = 1;
for (; b; b >>= 1) {
if (b & 1) res = res * a % p;
a = a * a % p;
}
return res;
}
//trie
void insert() {
int p = 0;
for (int i = 0; str[i]; i++) {
int c = str[i] - 'A';
if (!tr[p][c]) tr[p][c] = ++idx;
p = tr[p][c];
}
mk[p] = 1;
}
void build() {
queue<int> q;
for (int i = 0; i < 26; i++) {
if (tr[0][i]) q.push(tr[0][i]);
}
while (!q.empty()) {
int u = q.front(); q.pop();
mk[u] |= mk[ne[u]];
for (int i = 0; i < 26; i++) {
int v = tr[u][i];
if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
int j = ne[u];// in KMP: j = next[i - 1]
ne[v] = tr[j][i];
q.push(v);
}
}
}
int main() {
ios::sync_with_stdio(false); cin.tie(0);
cin >> n >> m;
for (int i = 1; i <= n; i++) {
cin >> str;
insert();
}
build();
dp[0][0] = 1;
for (int i = 0; i <= m - 1; i++) {
for (int j = 0; j <= idx; j++) {
for (char c = 0; c < 26; c++) {
if (!mk[tr[j][c]]) {
(dp[i + 1][tr[j][c]] += dp[i][j]) %= mod;
}
}
}
}
int res = pow(26, m, mod);
for (int i = 0; i <= idx; i++) res = (res - dp[m][i] % mod + mod) % mod;
cout << res << "\n";
return 0;
}
acwing1052设计密码是n=1情形下的相似问题,可以使用KMP解决
//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;
const int N = 55, mod = 1e9 + 7;
int n, m, ne[N], f[N][N];
char str[N];
void getnext(char* p, int n) {
for (int i = 2, j = 0; i <= n; i++) {
while (j && p[i] != p[j + 1]) j = ne[j];
if (str[i] == str[j + 1]) j++;
ne[i] = j;
}
}
int main() {
cin >> n >> str + 1;
int m = strlen(str + 1);
getnext(str, m);
f[0][0] = 1;//长度为i,匹配j位的字符串种类数
for (int i = 0; i < n; i++) {
for (int j = 0; j <= m; j++) {
for (char k = 'a'; k <= 'z'; k++) {
int u = j;
while (u && k != str[u + 1]) u = ne[u];
if (k == str[u + 1]) u++;
if (u < m) (f[i + 1][u] += f[i][j]) %= mod;
}
}
}
int res = 0;
for (int i = 0; i < m; i++) (res += f[n][i]) %= mod;
cout << res << "\n";
return 0;
}
修复DNA
题意:求主串至少需要修改多少字符使其不包含任意模式串
题解:仍是自动机上dp
设f[i,j]表示长度为i、匹配到状态j最少需要修改的字符数量,初始化f[0][0]=0,其他为inf,容易知道
for (int k = 0; k < 4; k++) {
int p = tr[j][k];
if (!dar[p]) f[i + 1][p] = min(f[i + 1][p], f[i][j] + (get(str[i + 1]) != k));
}
其中dar[p]是不能到达的状态,注意需要在建立自动机时进行dar[v] |= dar[ne[v]]
对所有f[m][j]取最小值即可。
//
// Created by vv123 on 2022/8/31.
//
#include <bits/stdc++.h>
using namespace std;
const int N = 1010;
int n, m, tr[N][4], dar[N], ne[N], idx, f[N][N], Case;
char str[N];
int get(char c) {
if (c == 'A') return 0;
if (c == 'T') return 1;
if (c == 'G') return 2;
return 3;
}
//trie
void insert() {
int p = 0;
for (int i = 0; str[i]; i++) {
int c = get(str[i]);
if (!tr[p][c]) tr[p][c] = ++idx;
p = tr[p][c];
}
dar[p] = 1;
}
void build() {
queue<int> q;
for (int i = 0; i < 4; i++) {
if (tr[0][i]) q.push(tr[0][i]);
}
while (!q.empty()) {
int u = q.front(); q.pop();
for (int i = 0; i < 4; i++) {
int v = tr[u][i];
if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
int j = ne[u];// in KMP: j = next[i - 1]
ne[v] = tr[j][i];
dar[v] |= dar[ne[v]];
q.push(v);
}
}
}
inline void init() {
memset(tr, 0, sizeof tr);
memset(dar, 0, sizeof dar);
memset(ne, 0, sizeof ne);
idx = 0;
}
inline void solve() {
for (int i = 1; i <= n; i++) {
cin >> str;
insert();
}
build();
cin >> str + 1;
m = strlen(str + 1);
memset(f, 0x3f, sizeof f);
f[0][0] = 0;
for (int i = 0; i < m; i++) {
for (int j = 0; j <= idx; j++) {
for (int k = 0; k < 4; k++) {
int p = tr[j][k];
if (!dar[p]) f[i + 1][p] = min(f[i + 1][p], f[i][j] + (get(str[i + 1]) != k));
}
}
}
int res = 0x3f3f3f3f;
for (int i = 0; i <= idx; i++) res = min(res, f[m][i]);
if (res == 0x3f3f3f3f) res = -1;
printf("Case %d: %d\n", ++Case, res);
}
int main(){
while (cin >> n && n != 0) {
init(); solve();
}
return 0;
}

浙公网安备 33010602011771号