专题:AC自动机


搜索关键词

题意:有多少单词在文本中出现过
题解:对单词结尾的位置记录一个cnt
在自动机上找到了匹配到i为止最深的结点j
那么所有的ne[j],ne[ne[j]]...也是可以匹配的

//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;

const int N = 5e5 + 10, M = 1e6 + 10;
int n, tr[N][26], cnt[N], idx, ne[N];
char str[M];

//trie
void insert() {
    int p = 0;
    for (int i = 0; str[i]; i++) {
        int c = str[i] - 'a';
        if (!tr[p][c]) tr[p][c] = ++idx;
        p = tr[p][c];
    }
    cnt[p]++;
}

void build() {
    queue<int> q;
    for (int i = 0; i < 26; i++) {
        if (tr[0][i]) q.push(tr[0][i]);
    }
    while (!q.empty()) {
        int u = q.front(); q.pop();
        for (int i = 0; i < 26; i++) {
            int v = tr[u][i];
            if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
            int j = ne[u];// in KMP: j = next[i - 1]
            ne[v] = tr[j][i];
            q.push(v);
        }
    }
}

void solve() {
    memset(tr, 0, sizeof tr);
    memset(cnt, 0, sizeof cnt);
    memset(ne, 0, sizeof ne);
    idx = 0;
    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> str;
        insert();
    }
    build();
    cin >> str;
    int res = 0;
    for (int i = 0, j = 0; str[i]; i++) {
        int c = str[i] - 'a';
        j = tr[j][c];
        //找到了匹配到i为止最深的结点
        //那么所有的ne[j],ne[ne[j]]...也是可以匹配的
        int p = j;
        while (p) {
            res += cnt[p];
            cnt[p] = 0;
            p = ne[p];
        }
    }
    cout << res << "\n";
}

int main() {
    ios::sync_with_stdio(false); cin.tie(0);
    int T;
    cin >> T;
    while (T--) solve();
    return 0;
}

单词

题意:每个串在所有串中出现了多少次
所有的ne[x]向x连边,将构成一个树形结构
考虑x的答案会累加给ne[x],按照BFS序的倒序进行树形DP即可

//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;

const int N = 1e6 + 10;
int n, tr[N << 1][26], idx, f[N << 1], ne[N << 1], pos[N];
char str[N];

//trie
void insert(int x) {
    int p = 0;
    for (int i = 0; str[i]; i++) {
        int c = str[i] - 'a';
        if (!tr[p][c]) tr[p][c] = ++idx;
        p = tr[p][c];
        f[p]++;
    }
    pos[x] = p;
}

vector<int> vec;

void build() {
    queue<int> q;
    for (int i = 0; i < 26; i++) {
        if (tr[0][i]) q.push(tr[0][i]);
    }
    while (!q.empty()) {
        int u = q.front(); q.pop(); vec.push_back(u);//idx个点
        for (int i = 0; i < 26; i++) {
            int v = tr[u][i];
            if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
            int j = ne[u];// in KMP: j = next[i - 1]
            ne[v] = tr[j][i];
            q.push(v);
        }
    }
}

int main() {
    ios::sync_with_stdio(false); cin.tie(0);
    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> str;
        insert(i);
    }
    build();
    for (int i = idx - 1; i >= 0; i--) f[ne[vec[i]]] += f[vec[i]];
    for (int i = 1; i <= n; i++) cout << f[pos[i]] << "\n";
    return 0;
}

JSOI2007 文本生成器

题意:
给出n个串,求有多少长度为m的串至少包含上述一个串
题解:
状态机模型DP

//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;

const int N = 2e5 + 10, mod = 10007;
int n, m, tr[N][26], idx, ne[N], mk[N], dp[110][N];
char str[N];


int pow(int a, int b, int p) {
	int res = 1;
	for (; b; b >>= 1) {
		if (b & 1) res = res * a % p;
		a = a * a % p;
	}
	return res;
}

//trie
void insert() {
    int p = 0;
    for (int i = 0; str[i]; i++) {
        int c = str[i] - 'A';
        if (!tr[p][c]) tr[p][c] = ++idx;
        p = tr[p][c];
    }
    mk[p] = 1;
}

void build() {
    queue<int> q;
    for (int i = 0; i < 26; i++) {
        if (tr[0][i]) q.push(tr[0][i]);
    }
    while (!q.empty()) {
        int u = q.front(); q.pop();
        mk[u] |= mk[ne[u]];
        for (int i = 0; i < 26; i++) {
            int v = tr[u][i];
            if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
            int j = ne[u];// in KMP: j = next[i - 1]
            ne[v] = tr[j][i];
            q.push(v);
        }
    }
}

int main() {
    ios::sync_with_stdio(false); cin.tie(0);
    cin >> n >> m;
    for (int i = 1; i <= n; i++) {
        cin >> str;
        insert();
    }
    build();
    dp[0][0] = 1;
    for (int i = 0; i <= m - 1; i++) {
        for (int j = 0; j <= idx; j++) {
            for (char c = 0; c < 26; c++) {
                if (!mk[tr[j][c]]) {
                    (dp[i + 1][tr[j][c]] += dp[i][j]) %= mod;
                }
            }
        }
    }
    int res = pow(26, m, mod);
    for (int i = 0; i <= idx; i++) res = (res - dp[m][i] % mod + mod) % mod;
    cout << res << "\n";
    return 0;
}

acwing1052设计密码是n=1情形下的相似问题,可以使用KMP解决

//
// Created by vv123 on 2022/8/30.
//
#include <bits/stdc++.h>
using namespace std;

const int N = 55, mod = 1e9 + 7;
int n, m, ne[N], f[N][N];
char str[N];

void getnext(char* p, int n) {
    for (int i = 2, j = 0; i <= n; i++) {
        while (j && p[i] != p[j + 1]) j = ne[j];
        if (str[i] == str[j + 1]) j++;
        ne[i] = j;
    }
}

int main() {
    cin >> n >> str + 1;
    int m = strlen(str + 1);
    getnext(str, m);
    f[0][0] = 1;//长度为i,匹配j位的字符串种类数
    for (int i = 0; i < n; i++) {
        for (int j = 0; j <= m; j++) {
            for (char k = 'a'; k <= 'z'; k++) {
                int u = j;
                while (u && k != str[u + 1]) u = ne[u];
                if (k == str[u + 1]) u++;
                if (u < m) (f[i + 1][u] += f[i][j]) %= mod;
            }
        }
    }
    int res = 0;
    for (int i = 0; i < m; i++) (res += f[n][i]) %= mod;
    cout << res << "\n";
    return 0;
}

修复DNA

题意:求主串至少需要修改多少字符使其不包含任意模式串
题解:仍是自动机上dp
设f[i,j]表示长度为i、匹配到状态j最少需要修改的字符数量,初始化f[0][0]=0,其他为inf,容易知道

for (int k = 0; k < 4; k++) {
    int p = tr[j][k];
    if (!dar[p]) f[i + 1][p] = min(f[i + 1][p], f[i][j] + (get(str[i + 1]) != k));
}

其中dar[p]是不能到达的状态,注意需要在建立自动机时进行dar[v] |= dar[ne[v]]
对所有f[m][j]取最小值即可。

//
// Created by vv123 on 2022/8/31.
//
#include <bits/stdc++.h>
using namespace std;

const int N = 1010;
int n, m, tr[N][4], dar[N], ne[N], idx, f[N][N], Case;
char str[N];

int get(char c) {
    if (c == 'A') return 0;
    if (c == 'T') return 1;
    if (c == 'G') return 2;
    return 3;
}

//trie
void insert() {
    int p = 0;
    for (int i = 0; str[i]; i++) {
        int c = get(str[i]);
        if (!tr[p][c]) tr[p][c] = ++idx;
        p = tr[p][c];
    }
    dar[p] = 1;
}

void build() {
    queue<int> q;
    for (int i = 0; i < 4; i++) {
        if (tr[0][i]) q.push(tr[0][i]);
    }
    while (!q.empty()) {
        int u = q.front(); q.pop();
        for (int i = 0; i < 4; i++) {
            int v = tr[u][i];
            if (!v) { tr[u][i] = tr[ne[u]][i]; continue; }
            int j = ne[u];// in KMP: j = next[i - 1]
            ne[v] = tr[j][i];
            dar[v] |= dar[ne[v]];
            q.push(v);
        }
    }
}

inline void init() {
    memset(tr, 0, sizeof tr);
    memset(dar, 0, sizeof dar);
    memset(ne, 0, sizeof ne);
    idx = 0;
}

inline void solve() {
    for (int i = 1; i <= n; i++) {
        cin >> str;
        insert();
    }
    build();
    cin >> str + 1;
    m = strlen(str + 1);

    memset(f, 0x3f, sizeof f);
    f[0][0] = 0;
    for (int i = 0; i < m; i++) {
        for (int j = 0; j <= idx; j++) {
            for (int k = 0; k < 4; k++) {
                int p = tr[j][k];
                if (!dar[p]) f[i + 1][p] = min(f[i + 1][p], f[i][j] + (get(str[i + 1]) != k));
            }
        }
    }
    int res = 0x3f3f3f3f;
    for (int i = 0; i <= idx; i++) res = min(res, f[m][i]);
    if (res == 0x3f3f3f3f) res = -1;
    printf("Case %d: %d\n", ++Case, res);
}

int main(){
    while (cin >> n && n != 0) {
        init(); solve();
    }
    return 0;
}
posted @ 2022-08-31 17:28  _vv123  阅读(38)  评论(0)    收藏  举报