字符串(Updating)

ACAM

AC 自动机常用于解决多模匹配问题。

首先 \(t\) 为模式串,\(s\) 为文本串。\(Q\) 为自动机的状态集合,我们对所有模式串建立一颗字典树。记 \(fail_u\) 表示 \(v \in Q\)\(v\)\(u\) 的最长后缀。然后 \(son_{u, c}\) 表示 \(\text{trans}(u, c)\)。询问很简单,对所有点一直跳 fail 然后求和,由于所有 fail 指针组成的是一个根向树,所以可以 DFS 跑一下树上差分。时间复杂度 \(\mathcal{O}(|\Sigma|\sum |t_i| + |s_i|)\)。一些帮助理解的:ACAM 和 KMP 的匹配本质相同,每次加进来一个字符 \(c\),如果能匹配就匹配,不能匹配就到 \(fail_u\) 上尝试,只是不断跳的过程在 build 里而已。

#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 2e5 + 10, M = 2e6 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, son[N][26], ans[N], fail[N], idx[N], tot;
char s[M];
basic_string<int>e[N];
int insert(char s[]) {
    int u = 0;
    for (int i = 1; s[i]; i++) {
        int c = s[i] - 'a';
        if (!son[u][c]) son[u][c] = ++tot;
        u = son[u][c];
    }
    return u;
}
void build() {
    queue<int>q;
    for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]);
    while (!q.empty()) {
        int u = q.front(); q.pop();
        for (int i = 0; i < 26; i++) {
            if (son[u][i]) fail[son[u][i]] = son[fail[u]][i], q.push(son[u][i]);
            else son[u][i] = son[fail[u]][i];
        }
    }
}
void query(char s[]) {
    int u = 0;
    for (int i = 1; s[i]; i++) {
        u = son[u][s[i] - 'a'];
        ans[u]++;
    }
}
void dfs(int u) {
    for (int v : e[u]) {
        dfs(v);
        ans[u] += ans[v];
    }
}
void main() {
    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> (s + 1);
        idx[i] = insert(s);
    }
    build();
    for (int i = 1; i <= tot; i++) e[fail[i]] += i;
    cin >> (s + 1);
    query(s); dfs(0);
    for (int i = 1; i <= n; i++) cout << ans[idx[i]] << '\n';
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}

例题:

板子

  1. P3966 \(fail_u\) 表示的串一定会在 \(u\) 表示的串中出现,所以答案就是建出 fail 树后求子树和。
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 2e5 + 10, M = 1e6 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, son[M][26], ans[M], fail[M], idx[M], tot;
char s[M];
basic_string<int>e[M];
int insert(char s[]) {
    int u = 0;
    for (int i = 1; s[i]; i++) {
        int c = s[i] - 'a';
        if (!son[u][c]) son[u][c] = ++tot;
        u = son[u][c];
        ans[u]++;
    }
    return u;
}
void build() {
    queue<int>q;
    for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]);
    while (!q.empty()) {
        int u = q.front(); q.pop();
        for (int i = 0; i < 26; i++) {
            if (son[u][i]) fail[son[u][i]] = son[fail[u]][i], q.push(son[u][i]);
            else son[u][i] = son[fail[u]][i];
        }
    }
}
void query(char s[]) {
    int u = 0;
    for (int i = 1; s[i]; i++) {
        u = son[u][s[i] - 'a'];
        ans[u]++;
    }
}
void dfs(int u) {
    for (int v : e[u]) {
        dfs(v);
        ans[u] += ans[v];
    }
}
void main() {
    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> (s + 1);
        idx[i] = insert(s);
    }
    build();
    for (int i = 1; i <= tot; i++) e[fail[i]] += i;
    // query(s);
    dfs(0);
    for (int i = 1; i <= n; i++) cout << ans[idx[i]] << '\n';
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
  1. P2444 相当于在 ACAM 里找一个能从根走到的环,DFS 即可。
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 2e5 + 10, M = 3e4 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, son[M][2], b[M], vis[M], ins[M], fail[M], idx[M], tot, ok;
char s[M];
basic_string<int>e[M];
int insert(char s[]) {
    int u = 0;
    for (int i = 1; s[i]; i++) {
        int c = s[i] - '0';
        if (!son[u][c]) son[u][c] = ++tot;
        u = son[u][c];
    }
    b[u] = 1;
    return u;
}
void build() {
    queue<int>q;
    for (int i = 0; i < 2; i++) if (son[0][i]) q.push(son[0][i]);
    while (!q.empty()) {
        int u = q.front(); q.pop();
        b[u] |= b[fail[u]];
        for (int i = 0; i < 2; i++) {
            if (son[u][i]) fail[son[u][i]] = son[fail[u]][i], q.push(son[u][i]);
            else son[u][i] = son[fail[u]][i];
        }
    }
}
void dfs(int u) {
    vis[u] = ins[u] = 1;
    for (int i = 0; i < 2; i++) if (son[u][i] && !b[son[u][i]]) {
        if (ins[son[u][i]]) ok = 1;
        else if (!vis[son[u][i]]) dfs(son[u][i]);
    }
    ins[u] = 0;
}
void main() {
    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> (s + 1);
        idx[i] = insert(s);
    }
    if (!son[0][0] || !son[0][1]) {
        cout << "TAK\n";
        return ;
    }
    build();
    // query(s);
    dfs(0);
    cout << (ok ? "TAK\n" : "NIE\n");
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
  1. CF1202E 首先考虑转化为枚举分界点 \(i\),令 \(f_i\) 表示 \(s_j\)\(t_{[l, i]}\) 匹配的 \(j\) 的个数,\(g_i\) 表示 \(s_j\)\(t_{[i, r]}\) 匹配的 \(j\) 的个数,则答案为 \(\sum f_i \times g_{i + 1}\)\(f\) 的话,记一下其 fail 树上到根节点的作为某字符串结尾的节点的个数。\(g\) 的话考虑翻转一下 \(s, t\) 就变成了和求 \(f\) 一样的。时间复杂度 \(\mathcal{O}(n + m)\)
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 2e5 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, f[N], g[N];
string s, t;
struct ACAM {
    int tot, son[N][26], fail[N], val[N];
    void insert(string s) {
        int u = 0;
        for (char c : s) {
            int &v = son[u][c - 'a'];
            if (!v) v = ++tot;
            u = v;
        }
        val[u]++;
    }
    void build() {
        queue<int>q;
        for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]);
        while (!q.empty()) {
            int u = q.front(); q.pop();
            for (int i = 0; i < 26; i++) {
                int &v = son[u][i];
                if (v) {
                    fail[v] = son[fail[u]][i];
                    val[v] += val[fail[v]];
                    q.push(v);
                } else v = son[fail[u]][i];
            }
        }
    }
    void query(string s, int *f) {
        int u = 0;
        for (int i = 0; i < (int)s.size(); i++) {
            u = son[u][s[i] - 'a'];
            f[i] = val[u];
        }
    }
} acam1, acam2;
void main() {
    cin >> t;
    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> s;
        acam1.insert(s);
        reverse(s.begin(), s.end());
        acam2.insert(s);
    }
    acam1.build(); acam2.build();
    acam1.query(t, f);
    reverse(t.begin(), t.end());
    acam2.query(t, g);
    ll ans = 0;
    int len = (int)t.size();
    for (int i = 0; i + 1 < len; i++) {
        ans += (ll)f[i] * g[len - i - 2];
    }
    cout << ans << '\n';
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
// start coding at 19:39
// finish debugging at 20:04

ACAM 上 DP

  1. P4052 考虑 \(dp_{i, u}\) 表示长度为 \(i\),ACAM 上走到了节点 \(u\),转移显然。
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 1e2 + 10, M = 6e3 + 10, mod = 1e4 + 7;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
void add(int &x, int y) { x += y; if (x >= mod) x -= mod; }
int n, m, dp[N][M], ans;
char s[N][N];
struct ACAM {
    int tot, son[M][26], fail[M], b[M];
    void insert(char s[]) {
        int u = 0, len = strlen(s + 1);
        for (int i = 1; i <= len; i++) {
            int &v = son[u][s[i] - 'A'];
            if (!v) v = ++tot;
            u = v;
        }
        b[u] = 1;
    }
    void build() {
        queue<int>q;
        for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]);
        while (!q.empty()) {
            int u = q.front(); q.pop();
            for (int i = 0; i < 26; i++) {
                int &v = son[u][i];
                if (v) {
                    fail[v] = son[fail[u]][i];
                    b[v] |= b[fail[v]];
                    q.push(v);
                } else v = son[fail[u]][i];
            }
        }
    }
    void DP() {
        dp[0][0] = 1;
        for (int i = 0; i < m; i++) {
            for (int u = 0; u <= tot; u++) {
                for (int j = 0; j < 26; j++) if (!b[son[u][j]]) {
                    add(dp[i + 1][son[u][j]], dp[i][u]);
                }
            }
        }
        for (int i = 0; i <= tot; i++) add(ans, dp[m][i]);
    }
} acam;
void main() {
    cin >> n >> m;
    for (int i = 1; i <= n; i++) {
        cin >> (s[i] + 1);
        acam.insert(s[i]);
    }
    acam.build();
    acam.DP();
    int fac = 1;
    for (int i = 1; i <= m; i++) fac = 26ll * fac % mod;
    cout << (fac - ans + mod) % mod << '\n';
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
// start coding at unknown
// finish debugging at 21:22
  1. CF433E 考虑数位 DP。

  2. P3311 考虑数位 DP。

#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 2e3 + 10, M = 1e2 + 10, mod = 1e9 + 7;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, m, f[N][N][2][2];
char s[M][N], t[N];
struct ACAM {
    int tot, son[N][10], fail[N], val[N];
    void insert(char s[]) {
        int u = 0, len = strlen(s + 1);
        for (int i = 1; i <= len; i++) {
            int &v = son[u][s[i] - '0'];
            if (!v) v = ++tot;
            u = v;
        }
        val[u] = 1;
    }
    void build() {
        queue<int>q;
        for (int i = 0; i < 10; i++) if (son[0][i]) q.push(son[0][i]);
        while (!q.empty()) {
            int u = q.front(); q.pop();
            for (int i = 0; i < 10; i++) {
                int &v = son[u][i];
                if (v) {
                    fail[v] = son[fail[u]][i];
                    val[v] |= val[fail[v]];
                    q.push(v);
                } else v = son[fail[u]][i];
            }
        }
    }
    int dfs(int d, int u, bool lim, bool lz) {
        if (d == n + 1) return !lz;
        if (~f[d][u][lim][lz]) return f[d][u][lim][lz];
        int res = 0, up = lim ? t[d] - '0' : 9;
        for (int i = 0; i <= up; i++) {
            if (lz && (i == 0)) res = (res + dfs(d + 1, 0, lim & (i == up), 1)) % mod;
            else if (!val[son[u][i]]) res = (res + dfs(d + 1, son[u][i], lim & (i == up), 0)) % mod;
        }
        return f[d][u][lim][lz] = res;
    }
} acam;
void main() {
    cin >> (t + 1) >> m;
    n = strlen(t + 1);
    for (int i = 1; i <= m; i++) {
        cin >> (s[i] + 1);
        acam.insert(s[i]);
    }
    acam.build();
    memset(f, 255, sizeof f);
    cout << acam.dfs(1, 0, 1, 1) << '\n';
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
// start coding at 19:41
// finish debugging at 19:59
  1. P2292 考虑暴力跳 fail 的 DP:\(f_i = \lor f_j[\exists s_i = t_{[j + 1, i]}]\),注意到 \(|s_i| \le 20\),所以可以状压所有可以转移的位置。
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 30, M = 2e6 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, m;
char s[N][N], t[M];
struct ACAM {
    int tot, son[N * N][26], idx[N * N], fail[N * N], dep[N * N], st[N * N];
    void insert(char s[]) {
        int u = 0;
        for (int i = 1; s[i]; i++) {
            int &v = son[u][s[i] - 'a'];
            if (!v) v = ++tot;
            u = v;
        }
        idx[u] = 1;
    }
    void build() {
        queue<int>q;
        for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]), dep[son[0][i]] = 1;
        while (!q.empty()) {
            int u = q.front(); q.pop();
            st[u] = st[fail[u]];
            if (idx[u]) st[u] |= 1 << dep[u];
            for (int i = 0; i < 26; i++) {
                int &v = son[u][i];
                if (v) {
                    fail[v] = son[fail[u]][i];
                    dep[v] = dep[u] + 1;
                    q.push(v);
                } else v = son[fail[u]][i];
            }
        }
    }
    int query(char t[]) {
        int u = 0, mx = 0;
        unsigned s = 1;
        for (int i = 1; t[i]; i++) {
            u = son[u][t[i] - 'a'];
            s <<= 1;
            if (st[u] & s) s |= 1, mx = i;
        }
        return mx;
    }
} acam;
void main() {
    cin >> n >> m;
    for (int i = 1; i <= n; i++) {
        cin >> (s[i] + 1);
        acam.insert(s[i]);
    }
    acam.build();
    while (m--) {
        cin >> (t + 1);
        cout << acam.query(t) << '\n';
    }
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
// start coding at 17:58
// finish debugging at 18:17

搭配 DS

  1. P2414 考虑子串等价于某个前缀的后缀,前缀就是 trie 上 \(u\) 到根的路径上的点,后缀就是 fail 树上的点,于是相当于 fail 树上 \(x\) 的子树与 trie 上 \(y\) 到根节点路径的交集的大小,路径加子树查,BIT 即可。
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 2e5 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
char s[N];
int Q, L[N], R[N], idx, stk[N], top, a[N], cnt;
int son[N][26], fail[N], tot;
int ans[N];
vector<pii>qry[N];
basic_string<int>e[N], g[N];
struct Fenwick {
    int tr[N];
    void add(int x, int v) { for (int i = x; i <= idx; i += i & -i) tr[i] += v; }
    int ask(int x) { int res = 0; for (int i = x; i; i -= i & -i) res += tr[i]; return res; }
    int ask(int l, int r) { return ask(r) - ask(l - 1); }
} BIT;
void dfs1(int u) {
    L[u] = ++idx;
    for (int v : e[u]) dfs1(v);
    R[u] = idx;
}
void dfs2(int u) {
    BIT.add(L[u], 1);
    for (auto [x, id] : qry[u]) {
        ans[id] = BIT.ask(L[x], R[x]);
    }
    for (int v : g[u]) dfs2(v);
    BIT.add(L[u], -1);
}
void main() {
    cin >> (s + 1);
    for (int i = 1; s[i]; i++) {
        if (s[i] == 'B') top--;
        else if (s[i] == 'P') a[++cnt] = stk[top];
        else {
            int u = stk[top], &v = son[u][s[i] - 'a'];
            if (!v) v = ++tot;
            stk[++top] = v;
        }
    }
    for (int i = 0; i <= tot; i++) {
        for (int j = 0; j < 26; j++) if (son[i][j]) g[i] += son[i][j];
    }
    queue<int>q;
    for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]);
    while (!q.empty()) {
        int u = q.front(); q.pop();
        e[fail[u]] += u;
        for (int i = 0; i < 26; i++) {
            int &v = son[u][i];
            if (v) {
                fail[v] = son[fail[u]][i];
                q.push(v);
            } else v = son[fail[u]][i];
        }
    }
    dfs1(0);
    cin >> Q;
    for (int i = 1; i <= Q; i++) {
        int u, v; cin >> u >> v;
        qry[a[v]].push_back({a[u], i});
    }
    dfs2(0);
    for (int i = 1; i <= Q; i++) cout << ans[i] << '\n';
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
// start coding at 21:19
// finish debugging at 21:52
  1. CF163E ACAM 是个离线算法,但是这题我们可以直接初始插入,删除就是点权置 \(0\),查询就是每个前缀 fail 树上到根节点的权值和。
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 1e6 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, Q, tot, idx, son[N][26], fail[N], pos[N], b[N], L[N], R[N];
string s[N], t;
basic_string<int>e[N];
struct Fenwick {
    int tr[N];
    void add(int x, int v) { for (int i = x; i <= idx; i += i & -i) tr[i] += v; }
    void add(int l, int r, int v) { add(l, v); add(r + 1, -v); }
    int ask(int x) { int res = 0; for (int i = x; i; i -= i & -i) res += tr[i]; return res; }
} BIT;
void insert(string s, int id) {
    int u = 0;
    for (char c : s) {
        int &v = son[u][c - 'a'];
        if (!v) v = ++tot;
        u = v;
    }
    pos[id] = u;
}
void build() {
    queue<int>q;
    for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]);
    while (!q.empty()) {
        int u = q.front(); q.pop();
        e[fail[u]] += u;
        for (int i = 0; i < 26; i++) {
            int &v = son[u][i];
            if (v) {
                fail[v] = son[fail[u]][i];
                q.push(v);
            } else v = son[fail[u]][i];
        }
    }
}
void dfs(int u) {
    L[u] = ++idx;
    for (int v : e[u]) dfs(v);
    R[u] = idx;
}
int query(string t) {
    int u = 0;
    ll res = 0;
    for (char c : t) {
        if (c == '?') continue;
        u = son[u][c - 'a'];
        res += BIT.ask(L[u]);
    }
    return res;
}
void main() {
    cin >> Q >> n;
    for (int i = 1; i <= n; i++) {
        cin >> s[i];
        insert(s[i], i);
    }
    build();
    dfs(0);
    for (int i = 1; i <= n; i++) {
        BIT.add(L[pos[i]], R[pos[i]], 1);
        b[i] = 1;
    }
    while (Q--) {
        cin >> t;
        char op = t[0];
        if (op == '?') cout << query(t) << '\n';
        else {
            int x = 0;
            for (int i = 1; i < (int)t.size(); i++) x = x * 10 + t[i] - '0';
            if (op == '-' && b[x]) {
                b[x] = 0;
                x = pos[x];
                BIT.add(L[x], R[x], -1);
            }
            if (op == '+' && !b[x]) {
                b[x] = 1;
                x = pos[x];
                BIT.add(L[x], R[x], 1);
            }
        }
    }
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
// start coding at 10:56
// finish debugging at 11:27
  1. P5840 对 S 建立 ACAM,根据 tricks 里第 41 条的结论,相当于链加单点查,BIT 即可。
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
// typedef __int128 i128;
typedef pair<int, int> pii;
const int N = 2e6 + 10, mod = 998244353;
template<typename T>
void dbg(const T &t) { cout << t << endl; }
template<typename Type, typename... Types>
void dbg(const Type& arg, const Types&... args) {
    cout << arg << ' ';
    dbg(args...);
}
namespace Loop1st {
int n, Q;
int tot, son[N][26], fail[N], pos[N];
int idx, L[N], R[N], fa[N], top[N], hson[N], sz[N], dep[N];
string s[N], t;
vector<int>a;
basic_string<int>e[N];
struct Fenwick {
    int tr[N];
    void add(int x, int v) { for (int i = x; i <= idx; i += i & -i) tr[i] += v; }
    int ask(int x) { int res = 0; for (int i = x; i; i -= i & -i) res += tr[i]; return res; }
    int ask(int l, int r) { return ask(r) - ask(l - 1); }
} BIT;
void insert(string s, int id) {
    int u = 0;
    for (char c : s) {
        int &v = son[u][c - 'a'];
        if (!v) v = ++tot;
        u = v;
    }
    pos[id] = u;
}
void build() {
    queue<int>q;
    for (int i = 0; i < 26; i++) if (son[0][i]) q.push(son[0][i]);
    while (!q.empty()) {
        int u = q.front(); q.pop();
        e[fail[u]] += u; fa[u] = fail[u];
        for (int i = 0; i < 26; i++) {
            int &v = son[u][i];
            if (v) {
                fail[v] = son[fail[u]][i];
                q.push(v);
            } else v = son[fail[u]][i];
        }
    }
}
void dfs1(int u) {
    sz[u] = 1;
    for (int v : e[u]) {
        dep[v] = dep[u] + 1;
        dfs1(v);
        sz[u] += sz[v];
        if (!hson[u] || sz[v] > sz[hson[u]]) hson[u] = v;
    }
}
void dfs2(int u, int tp) {
    L[u] = ++idx;
    top[u] = tp;
    if (hson[u]) dfs2(hson[u], tp);
    for (int v : e[u]) if (v != hson[u]) dfs2(v, v);    
    R[u] = idx;
}
int lca(int u, int v) {
    int x = top[u], y = top[v];
    while (x != y) {
        if (dep[x] < dep[y]) swap(u, v), swap(x, y);
        u = fa[x];
        x = top[u];
    }
    return dep[u] < dep[v] ? u : v;
}
void update(string t) {
    a.clear();
    int u = 0;
    for (char c : t) {
        u = son[u][c - 'a'];
        BIT.add(L[u], 1);
        a.push_back(u);
    }
    sort(a.begin(), a.end(), [&](const int &x, const int &y) { return L[x] < L[y]; });
    for (int i = 0; i < (int)a.size() - 1; i++) {
        BIT.add(L[lca(a[i], a[i + 1])], -1);
    }
}
void main() {
    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> s[i];
        insert(s[i], i);
    }
    build();
    dfs1(0);
    dfs2(0, 0);
    cin >> Q;
    while (Q--) {
        int op, x; cin >> op;
        if (op == 1) {
            cin >> t;
            update(t);
        } else {
            cin >> x;
            x = pos[x];
            cout << BIT.ask(L[x], R[x]) << '\n';
        }
    }
}

}
int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    int T = 1;
    // cin >> T;
    while (T--) Loop1st::main();
    return 0;
}
// start coding at 11:50
// finish debugging at 12:04
  1. P2336 和上一题类似。但是太大芬了,于是给出另一种解法

根号平衡

  1. CF587F 考虑对 \(|s_k|\) 根号分治,然后就是套路了。代码不想写。
  2. P7582 根号分治之后就是套路题了,我不想写。

SA

后缀数组。下面可以用 \(i\) 表示 \(s[i\dots n]\)

要记两个东西:\(sa_i, rk_i, sa_i\) 表示后缀排序后第 \(i\) 小的是哪个后缀,\(rk_i\) 表示第 \(i\) 个后缀是第几小的。显然 \(sa[rk[i]]=rk[sa[i]]=i\)

首先考虑 \(\mathcal{O}(n \log^2 n)\) 做法,我们记所有长度为 \(w\) 的子串排序后,第 \(i\) 个子串,是 \(sa_w[i]\)\(s[i\dots i + w - 1]\) 的排名为 \(rk_w[i]\),默认 \(s\) 超过 \(n\) 的部分是空字符。我们让 \(w\)\(1\) 开始,以 \(rk_w[i]\) 为第一关键字,\(rk_w[i+w]\) 为第二关键字排序,这样就可以得到 \(sa_{2w}\)\(rk_{2w}\),当 \(w \ge n\) 时就得到了所有后缀的 \(sa\)\(rk\)。这被称为倍增法。

然后考虑优化,发现两个关键字都很小,直接基数排序,先对第二关键字计数排序,再对第一关键字计数排序,时间复杂度 \(\mathcal{O}(n \log n)\),但是常数还是较大,有如下优化:

  1. 排第二关键字的时候不用计数排序,只要将所有 \(i + w > n\)\(i\) 放在最前面,剩下的位置考虑从小到大枚举 \(i\),若 \(sa_w[i] > w\)\(rk_w[sa_w[i]]\) 可以作为 \(sa_w[i] - w\) 的第二关键字,于是按顺序加入这样的 \(sa_w[i]-w\)
  2. 若一次排序后所有 \(rk\) 已经不同,就已经求出了最后的 \(sa\),直接结束即可。
  3. 每次计数排序的值域可以缩小至上次排序后的 \(rk\) 的最大值。
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> pii;
const int N = 1e6 + 10;
template<typename T> void dbg(const T &x) { cout << x << '\n'; }
template<typename Type, typename ...Types> void dbg(const Type &arg, const Types &...args) { cout << arg << ' '; dbg(args...); }
namespace Loop1st {
int n, m, sa[N], id[N], rk[N << 1], oldrk[N << 1], cnt[N];
char s[N];
void main() {
    cin >> (s + 1);
    n = strlen(s + 1);
    for (int i = 1; i <= n; i++) cnt[rk[i] = s[i]]++;
    m = 127;
    for (int i = 1; i <= m; i++) cnt[i] += cnt[i - 1];
    for (int i = n; i; i--) sa[cnt[rk[i]]--] = i;
    int p = 0;
    for (int w = 1; w < n; w <<= 1, m = p) {
        int cur = 0;
        for (int i = n - w + 1; i <= n; i++) id[++cur] = i;
        for (int i = 1; i <= n; i++) if (sa[i] > w) id[++cur] = sa[i] - w;
        memset(cnt, 0, (m + 1) << 2);
        for (int i = 1; i <= n; i++) cnt[rk[id[i]]]++;
        for (int i = 1; i <= m; i++) cnt[i] += cnt[i - 1];
        for (int i = n; i; i--) sa[cnt[rk[id[i]]]--] = id[i];
        p = 0;
        memcpy(oldrk, rk, (n + 1) << 2);
        for (int i = 1; i <= n; i++) {
            if (oldrk[sa[i]] == oldrk[sa[i - 1]] && oldrk[sa[i] + w] == oldrk[sa[i - 1] + w]) rk[sa[i]] = p;
            else rk[sa[i]] = ++p;
        } 
        if (p == n) break;
    }
    for (int i = 1; i <= n; i++) cout << sa[i] << " \n"[i == n];
}

}

int main() {
    // freopen("data.in", "r", stdin);
    // freopen("data.out", "w", stdout);
    ios::sync_with_stdio(false); cin.tie(0);
    clock_t Start = clock();
    int Test = 1;
    // cin >> Test;
    for (int tc = 1; tc <= Test; tc++) Loop1st::main();
    clock_t End = clock();
    cerr << "Time = "; cerr << fixed << setprecision(6) << 1. * (End - Start) / CLOCKS_PER_SEC << '\n';
    return 0;
}

另外,SA 还可以用于求 height 数组,定义 \(height[i]=\text{LCP}(sa[i], sa[i - 1]), height[1] = 0\),即两个相邻后缀的 \(\text{LCP}\)。我们有引理:

\[height[rk[i]] \ge height[rk[i - 1]] - 1 \]

证明:\(RHS \le 0\) 时显然成立,当 \(height[rk[i - 1]] > 1\) 时,\(\text{LCP}(sa[rk[i - 1]], sa[rk[i - 1] - 1]) > 1\),即 \(\text{LCP}(i - 1, sa[rk[i-1]-1]) > 1\),我们设这两个后缀的 \(\text{LCP}\)\(aA\),其中 \(a\) 是一个字符,\(A\) 是一个字符串,那么 \(i - 1\) 可以表示为 \(aAD\)\(sa[rk[i-1]-1]\) 可以表示为 \(aAB\),根据 \(sa\) 的性质有 \(B < D\)。而 \(i\) 可以表示为 \(AD\),而 \(sa[rk[i]-1]\)\(rk\) 只比 \(i\)\(1\),且 \(AB < AD\),即 \(AB\)\(rk\)\(i\) 至少小 \(1\),于是有 \(AB \le sa[rk[i]-1] < AD\),所以 $$height[i]=\text{LCP}(i, sa[rk[i]-1]) \ge |A|=height[rk[i-1]]-1$$

\[height[i] \ge height[rk[i-1]]-1 \]

于是可以 \(\mathcal{O}(n)\) 计算。

	for (int i = 1, k = 0; i <= n; i++) {
        if (k) k--;
        while (s[i + k] == s[sa[rk[i] - 1] + k]) k++;
        height[rk[i]] = k;
    }
```cpp
## 应用
### [JSOI2007] 字符加密
将 $S$ 复制一遍变为 $SS$ 之后后缀排序即可。
## [USACO07DEC] Best Cow Line G
每次相当于比 $L$ 开头的后缀和 $R$ 结尾的前缀,重复不用管因为不重复部分如果比不出来,那随便取哪个,所以将反串接到结尾,中间加个特殊字符跑 SA,然后判断 $rk$ 即可。
### 【模版】AC 自动机
子串一定是前缀的后缀,在 $sa$ 数组上二分并比较即可,比 ACAM 多一个 $\log |T|$ 的复杂度。
### 求两子串 LCP
LCP Lemma:
对于 $i < j < k \le n$,我们有 $\text{LCP}(i, k) = \min(\text{LCP}(i, j), \text{LCP}(j, k))$。证明显然,详情可见 [许智磊--后缀数组](https://github.com/OI-wiki/libs/blob/master/%E9%9B%86%E8%AE%AD%E9%98%9F%E5%8E%86%E5%B9%B4%E8%AE%BA%E6%96%87/%E5%9B%BD%E5%AE%B6%E9%9B%86%E8%AE%AD%E9%98%9F2004%E8%AE%BA%E6%96%87%E9%9B%86/%E8%AE%B8%E6%99%BA%E7%A3%8A--%E5%90%8E%E7%BC%80%E6%95%B0%E7%BB%84.pdf)。
LCP Theorem:
$\text{LCP}(sa[i], sa[j]) = \min\limits_{k=i+1}^j height[k]$。 证明由 LCP Lemma 直接得出。
于是可以变为 RMQ。

### 比较两子串大小关系
对于两子串 $A = s[a\dots b], B = s[c\dots d]$,若 $\text{LCP}(a, c) \ge \min(|A|, |B|)$ 则 $A < B \Leftrightarrow |A| < |B|$。否则,$A < B \Leftrightarrow rk[a] < rk[c]$。
### 本质不同子串数目
即后缀的前缀数量减去重复。

考虑按后缀排序的顺序枚举后缀 $sa[i]$,那么每次新增的子串要除去与上一个串的 $\text{LCP}$ 即 $height[i]$。这个后缀剩下的前缀一定是新增的,如果剩下的前缀中有一个在 $sa[j]$ 中出现过,那么 $\text{LCP}(sa[i], sa[j]) > height[i]$,然而 $\text{LCP}(sa[i], sa[j]) = \min(height[k]) \le height[i]$,矛盾。

所以最终答案就是 $\dfrac{n(n+1)}{2}-\sum\limits_{i=1}^n height[i]$。
### [USACO06DEC] Milk Patterns G
考虑后缀排序,出现的子串一定是连续的 $k$ 个后缀的前缀,相当于找连续 $k - 1$ 个 $height$ 的最小值的最大值,单调队列或 `std::multiset` 都是可行的。
### [ABC141E] Who Says a Pun?
二分答案 $x$,然后将 $height$ 数组分为若干 $\ge x$ 的极长段,然后将每段的 $sa_i$ 取 $\min, \max$,减一减看看是否可行即可。

鸽了。
posted @ 2026-04-03 22:52  循环一号  阅读(3)  评论(0)    收藏  举报