专题:后缀自动机


模板题

题意:求所有子串len*出现次数的最大值
题解:设f(u)为状态u的出现次数,除了自身作为前缀的1次外,等于所有子状态的出现次数之和,这可以反向建边简单树形dp求出。由于每个状态对应同一个endpos类,只需要用该状态中最长长度乘以出现次数更新答案即可。

//
// Created by vv123 on 2022/8/29.
//
#pragma GCC optimize("Ofast")
#pragma GCC optimize(2)
#pragma GCC optimize(3)
#pragma GCC optimize("inline")
#include <bits/stdc++.h>
using namespace std;

const int N = 2e6 + 10;
int tot = 1, last = 1;
struct Node {
    int len, fa;
    int ch[26];
} node[N];
char str[N];
long long f[N], ans;

void extend(int c) {
    int p = last, np = last = ++tot;
    node[np].len = node[p].len + 1;
    f[tot] = 1;
    for (; p && !node[p].ch[c]; p = node[p].fa) node[p].ch[c] = np;
    if (!p) node[np].fa = 1;
    else {
        int q = node[p].ch[c];
        if (node[q].len == node[p].len + 1) node[np].fa = q;
        else {
            int nq = ++tot;
            node[nq] = node[q]; node[nq].len = node[p].len + 1;
            node[q].fa = node[np].fa = nq;
            for (; p && node[p].ch[c] == q; p = node[p].fa) node[p].ch[c] = nq;
        }
    }
}

vector<int> g[N];

void dfs(int u) {
    for (auto v : g[u]) {
        dfs(v);
        f[u] += f[v];
    }
    if (f[u] > 1) ans = max(ans, f[u] * node[u].len);
}

signed main() {
    ios::sync_with_stdio(false); cin.tie(0);
    cin >> str;
    for (int i = 0; str[i]; i++) extend(str[i] - 'a');
    for (int i = 2; i <= tot; i++) {
        g[node[i].fa].emplace_back(i);
    }
    dfs(1);
    cout << ans << "\n";
    return 0;
}

玄武密码

题意:给定一个字符串s和若干串t,对每一个t求它在s中出现过的最长前缀。
题解:在SAM上遍历即可。

//
// Created by vv123 on 2022/8/29.
//
#include <bits/stdc++.h>
using namespace std;

const int N = 1e7 + 10;
int n, m, last = 1, tot = 1;
char str[N];
struct Node {
    int len, fa;
    int ch[4];
} node[N << 1];
inline int mp(char ch) {
    if (ch == 'E') return 0;
    if (ch == 'S') return 1;
    if (ch == 'W') return 2;
    return 3;
}


void extend(int c) {
    int p = last, np = last = ++tot;
    node[np].len = node[p].len + 1;
    for (; p && !node[p].ch[c]; p = node[p].fa) node[p].ch[c] = np;
    if (!p) node[np].fa = 1;
    else {
        int q = node[p].ch[c];
        if (node[q].len == node[p].len + 1) node[np].fa = q;
        else {
            int nq = ++tot;
            node[nq] = node[q]; node[nq].len = node[p].len + 1;
            node[q].fa = node[np].fa = nq;
            for (; p && node[p].ch[c] == q; p = node[p].fa) node[p].ch[c] = nq;
        }
    }
}

int main() {
    ios::sync_with_stdio(false); cin.tie(0);
    cin >> n >> m >> str;
    for (int i = 0; str[i]; i++) extend(mp(str[i]));
    while (m--) {
        cin >> str;
        int p = 1, res = 0;
        for (int i = 0; str[i]; i++) {
            int c = mp(str[i]);
            if (node[p].ch[c]) p = node[p].ch[c], res++;
            else break;
        }
        cout << res << "\n";
    }
    return 0;
}

最长公共子串

题意:求若干字符串的最长公共子串,n<=11,len<=10000
题解:考虑SAM跳fa的过程

我们将第一个字符串建出SAM,考虑第二个字符串,依次加入字符,如果可以匹配就往后跳,否则跳fa,以表示丢弃一部分前缀,这样就可以找到一个结束位置p,得到最大的匹配长度。
注意,如果p还有子状态,应当将p的答案传递给所有的子状态。这可以用建反边树形dp一遍来解决。
我们遍历第2~n个串,每个状态的答案取这n-1个结果的最小值,最后取所有状态的最大值,即为最长公共子串。

//
// Created by vv123 on 2022/8/29.
//
#include <bits/stdc++.h>
using namespace std;

const int N = 2e4 + 10;

int n;
int tot = 1, last = 1;
struct Node {
    int len, fa;
    int ch[26];
} node[N];
int ans[N], now[N];
char str[N];

void extend(int c) {
    int p = last, np = last = ++tot;
    node[np].len = node[p].len + 1;
    for (; p && !node[p].ch[c]; p = node[p].fa) node[p].ch[c] = np;
    if (!p) node[np].fa = 1;
    else {
        int q = node[p].ch[c];
        if (node[q].len == node[p].len + 1) node[np].fa = q;
        else {
            int nq = ++tot;
            node[nq] = node[q]; node[nq].len = node[p].len + 1;
            node[q].fa = node[np].fa = nq;
            for (; p && node[p].ch[c] == q; p = node[p].fa) node[p].ch[c] = nq;
        }
    }
}

vector<int> g[N];

void dfs(int u) {
    for (auto v : g[u]) {
        dfs(v);
        now[u] = max(now[u], now[v]);
    }
}

int main() {
    ios::sync_with_stdio(false); cin.tie(0);
    cin >> n >> str;
    for (int i = 0; str[i]; i++) extend(str[i] - 'a');
    for (int i = 1; i <= tot; i++) ans[i] = node[i].len;
    for (int i = 2; i <= tot; i++) g[node[i].fa].push_back(i);
    for (int k = 2; k <= n; k++) {
        cin >> str;
        memset(now, 0, sizeof now);
        int p = 1, res = 0;
        for (int i = 0; str[i]; i++) {
            int c = str[i] - 'a';
            while (p > 1 && !node[p].ch[c]) p = node[p].fa, res = node[p].len;
            if (node[p].ch[c]) p = node[p].ch[c], res++;
            now[p] = max(now[p], res);
        }
        dfs(1);
        for (int i = 1; i <= tot; i++) ans[i] = min(ans[i], now[i]);
    }
    int res = 0;
    for (int i = 1; i <= tot; i++) res = max(res, ans[i]);
    cout << res << "\n";
    return 0;
}
posted @ 2022-08-29 19:12  _vv123  阅读(49)  评论(0)    收藏  举报