AC自动机

数据结构维护fail树

string

 

AC自动机是离线型数据结构, 所以我们先离线下来所有操作, 先建出来 AC 自动机

再对 fail 树求 dfs 序用树状数组维护, 对 fail 树的子树区间加

为什么是对于子树加呢? 对于 fail 树的构建我们是由 fail [ i ] $\to$  i 

那么也就是说一个节点子树中的点都是代表了一个后缀等于 i 点所代表字符串且长度大于等于 len[ i ]

假如 i 点是字典串结束节点, 那么他子树的点都是蕴含这个串的, 那么子树的点含有的字典串需要加一

对于答案统计我们单点查询每个点被他上面的结束节点影响即可

#include <bits/stdc++.h>
using namespace std;
#define endl "\n"
typedef long long ll;

const int N = 3e6 + 100;

template <typename T>
struct BIT {
    T tr[N];
    int n;
    void init(int n_) {
        n = n_;
        for(int i = 0; i <= n; i++) tr[i] = 0;
    }
    int lowbit(int x) {return x & -x;}
    void add(int x, T v){
        for(int i = x; i <= n; i += lowbit(i)) tr[i] += v;
    }
    T sum(int x) {
        T res = 0;
        for(int i = x; i > 0; i -= lowbit(i)) res += tr[i];
        return res;
    }
    T sum(int l, int r) {return sum(r) - sum(l - 1);}
    void add(int l, int r, T v) {add(l, v); add(r + 1, -v);}
};

struct Aho_Corasick_Automaton {
    //basic
    int nxt[N][26], fail[N];
    int root, tot, dfn;
    //special
    int L[N], R[N];
    BIT<int> T;

    // g存的fail树, ref存每个字典串最后一个节点
    vector<int> g[N];
    map<int, int> ref;

    void clear() {
        memset(nxt[0], 0, sizeof nxt[0]);
        root = tot = dfn = 0;
        ref.clear(); g[0].clear(); T.init(N - 100);
    }
    int newnode() {
        tot++;
        memset(nxt[tot], 0, sizeof nxt[tot]);
        fail[tot] = 0;
        g[tot].clear();
        return tot;
    }
    void insert(char *s, int num) {
        int now = root;
        while (*s) {
            int id = *s - 'a';
            if (!nxt[now][id]) nxt[now][id] = newnode();
            now = nxt[now][id];
        }
        ref[num] = now;
    }
    void insert(string str, int num) {
        int now = root;
        for (int i = 0; i < str.size(); i++) {
            int id = str[i] - 'a';
            if (!nxt[now][id]) nxt[now][id] = newnode();
            now = nxt[now][id];
        }
        ref[num] = now; 
    }
    void build() {
        queue<int> q;
        for (int i = 0; i < 26; i++) {
            if (nxt[root][i]) q.push(nxt[root][i]);
        }
        while (!q.empty()) {
            int head = q.front(); q.pop();
            for (int i = 0; i < 26; i++) {
                // tire图
                int tmp = nxt[head][i];
                if(!tmp) nxt[head][i] = nxt[fail[head]][i];
                else{
                    fail[tmp] = nxt[fail[head]][i];
                    q.push(tmp);
                }  
            }
        }
        // 构建fail树
        for (int i = 1; i <= tot; i++) g[fail[i]].push_back(i);
    }
    int serch(string s) {
        int now = root, ans = 0;
        for (auto i : s) {
            int id = i - 'a';
            now = nxt[now][id];
            ans += T.sum(L[now]);
        }
        return ans;
    }
    void dfs(int x) {
        L[x] = ++dfn;
        for (auto y : g[x]) {
            dfs(y);
        }
        R[x] = dfn;
    }
} ACAM;

string s[N];
int op[N];

void solve() {
    int n, m; cin >> n >> m;
    ACAM.clear();
    for (int i = 1; i <= n; i++) {
        cin >> s[i]; op[i] = 1;
        ACAM.insert(s[i], i);
    }
    for (int i = 1; i <= m; i++) {
        cin >> op[i + n] >> s[i + n];
        if (op[i + n] == 1) ACAM.insert(s[i + n], i + n);
    }

    ACAM.build(); ACAM.dfs(0);

    for (int i = 1; i <= n + m; i++) {
        if (op[i] == 1) {
            int x = ACAM.ref[i];
            ACAM.T.add(ACAM.L[x], ACAM.R[x], 1);
        } else {
            cout << ACAM.serch(s[i]) << endl;
        }
    }
}

int main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);

    int T = 1; cin >> T;
    while (T--) solve();
    // // solve();

    return 0;   
}
View Code

 H-Mike and Friends

AC自动机 fail 树上 dfs 序建可持久化线段树

#include <bits/stdc++.h>
using namespace std;
#define endl "\n"
typedef long long ll;

const int N = 4e5 + 100;

struct Persistent_Segments_Tree {
    int root[N], cnt;
    
    // 左儿子 -> ls(id) 
    // 右儿子 -> rs(id)
    
    struct node {
        // sum 是要维护的信息
        int sum, l, r;
        node() {sum = l = r = 0;}
    } Seg[N * 30];

    #define ls(x) (Seg[x].l)
    #define rs(x) (Seg[x].r)

    void pushup(int id) {
        Seg[id].sum = Seg[ls(id)].sum + Seg[rs(id)].sum;
    }

    int build(int l, int r) {
        int id = ++cnt; Seg[id].sum = 0;
        if (l == r) return id;
        int mid = l + r >> 1;
        ls(id) = build(l, mid); rs(id) = build(mid + 1, r);
        return id;
    }

    int modify(int now, int l, int r, int pos, int v) {
        int id = ++cnt; Seg[id] = Seg[now];
        if (l == r) {
            Seg[id].sum += v;
            return id;
        }
        int mid = l + r >> 1;
        if (pos <= mid) ls(id) = modify(ls(id), l, mid, pos, v);
        else rs(id) = modify(rs(id), mid + 1, r, pos, v);
        pushup(id);
        return id;
    }

    int query(int id, int l, int r, int x, int y) {
        if (x <= l && y >= r) return Seg[id].sum;
        int mid = l + r >> 1, ans = 0;
        if (x <= mid) ans += query(ls(id), l, mid, x, y);
        if (y > mid) ans += query(rs(id), mid + 1, r, x, y);
        return ans;
    }    
} Seg;

struct Aho_Corasick_Automaton {
    //basic
    int nxt[N][26], fail[N];
    int root, tot, dfn;
    //special
    int cnt[N], L[N], R[N];

    // g存的fail树, ref存每个字典串最后一个节点
    vector<int> g[N];
    map<int, int> ref;

    void clear() {
        memset(nxt[0], 0, sizeof nxt[0]);
        root = tot = dfn = 0;
        ref.clear(); g[0].clear();
    }
    int newnode() {
        tot++;
        memset(nxt[tot], 0, sizeof nxt[tot]);
        cnt[tot] = fail[tot] = 0;
        g[tot].clear();
        return tot;
    }
    void insert(string str, int num) {
        int now = root;
        for (int i = 0; i < str.size(); i++) {
            int id = str[i] - 'a';
            if (!nxt[now][id]) nxt[now][id] = newnode();
            now = nxt[now][id];
        }
        ref[num] = now; 
    }
    void build() {
        queue<int> q;
        for (int i = 0; i < 26; i++) {
            if (nxt[root][i]) q.push(nxt[root][i]);
        }
        while (!q.empty()) {
            int head = q.front(); q.pop();
            for (int i = 0; i < 26; i++) {
                // tire图
                int tmp = nxt[head][i];
                if(!tmp) nxt[head][i] = nxt[fail[head]][i];
                else{
                    fail[tmp] = nxt[fail[head]][i];
                    q.push(tmp);
                }  
            }
        }
        // 构建fail树
        for (int i = 1; i <= tot; i++) g[fail[i]].push_back(i);
    }
    void dfs(int x, int fa) {
        L[x] = ++dfn;
        for (auto y : g[x]) {
            if (y != fa) dfs(y, x);
        }
        R[x] = dfn;
    }
} ACAM;

string s[N];

void solve() {
    int n, q; cin >> n >> q;
    for (int i = 1; i <= n; i++) {
        cin >> s[i];
        ACAM.insert(s[i], i);
    }
    ACAM.build();
    ACAM.dfs(0, 0);
    auto &root = Seg.root;
    auto &L = ACAM.L;
    auto &R = ACAM.R;
    auto &sz = ACAM.dfn;
    root[0] = Seg.build(1, sz);
    for (int i = 1; i <= n; i++) {
        int now = 0;
        for (auto j : s[i]) {
            int id = j - 'a';
            bool ok = (now == 0);
            now = ACAM.nxt[now][id];
            if (ok) root[i] = Seg.modify(root[i - 1], 1, sz, L[now], 1);
            else root[i] = Seg.modify(root[i], 1, sz, L[now], 1);
        }
    }
    while (q--) {
        int l, r, k; cin >> l >> r >> k;
        int id = ACAM.ref[k];
        cout << Seg.query(root[r], 1, sz, L[id], R[id]) - Seg.query(root[l - 1], 1, sz, L[id], R[id]) << endl;
    }
}   

signed main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);

    // int T = 1; cin >> T;
    // while (T--) solve();
    solve();    

    return 0;
}
View Code

The 14th Jilin Provincial Collegiate Programming Contest

D题


 

 dp

P7456 [CERC2018] The ABCD Murderer

首先 dp 是不难看出来的, 对于文本串我们肯定是希望能转移到他的状态越多越好, 所以我们希望文本串 [ 1, i ] 这段后缀匹配到的字典串越长越好

且一旦匹配必须是匹配一整段的字典串, 我们对字典串跑个 AC自动机, 再去 fail 树上 dfs 求出每个节点能匹配的最大长度字典串长度是多少

那么我们去 trie树上跳文本串时 跳到第 x 个点时就可以通过当前节点能匹配到的最大字典串长度进行状态转移, 拿个线段树去优化一下转移即可通过本题

View Code

 

D-[JSOI2007]文本生成器

独特的串【算法赛】

一眼丁真了, 和文本生成器基本上就是树 dp 稍有改动

我们用 dp[ i ][ j ][ 0 / 1 ] 表示匹配到 i 长度 匹配到 trie 图上 j 节点, k 表示是否有匹配到某个结束节点,直接转移就行

边界不太懂, 玄学设了几个数组大小试了十来次才过, 并且不能用 map ,map会 T 两个点, 直接换成数组就行

#include <bits/stdc++.h>
using namespace std;
#define endl "\n"
typedef long long ll;

const int N = 4e5 + 100;
const int mod = 998244353;

int f[5010][1010][2];

struct Aho_Corasick_Automaton {
    //basic
    int nxt[N][26], fail[N];
    int root, tot;
    //special
    int cnt[N], end[N];

    // g存的fail树, ref存每个字典串最后一个节点
    vector<int> g[N];

    void clear() {
        memset(nxt[0], 0, sizeof nxt[0]);
        root = tot = 0;
        g[0].clear();
    }
    int newnode() {
        tot++;
        memset(nxt[tot], 0, sizeof nxt[tot]);
        cnt[tot] = fail[tot] = 0;
        g[tot].clear();
        return tot;
    }
    void insert(string str) {
        int now = root;
        for (int i = 0; i < str.size(); i++) {
            int id = str[i] - 'a';
            if (!nxt[now][id]) nxt[now][id] = newnode();
            now = nxt[now][id];
        }
        end[now]++; 
    }
    void build() {
        queue<int> q;
        for (int i = 0; i < 26; i++) {
            if (nxt[root][i]) q.push(nxt[root][i]);
        }
        while (!q.empty()) {
            int head = q.front(); q.pop();
            for (int i = 0; i < 26; i++) {
                // tire图
                int tmp = nxt[head][i];
                if(!tmp) nxt[head][i] = nxt[fail[head]][i];
                else{
                    fail[tmp] = nxt[fail[head]][i];
                    end[tmp] += end[fail[tmp]];
                    q.push(tmp);
                }  
            }
        }
        // 构建fail树
        for (int i = 1; i <= tot; i++) g[fail[i]].push_back(i);
    }
    void get(int m) {
        f[0][0][0] = 1;
        for (int len = 1; len <= m; len++) {
            for (int i = 0; i <= tot; i++) {
                for (int j = 0; j < 26; j++) {
                    int x = nxt[i][j];
                    if (end[x] >= 2) continue;
                    if (!end[x]) {
                        f[len][x][0] += f[len - 1][i][0] % mod; f[len][x][0] %= mod;
                        f[len][x][1] += f[len - 1][i][1] % mod; f[len][x][1] %= mod;
                    } else {
                        f[len][x][1] += f[len - 1][i][0] % mod;
                        f[len][x][1] %= mod;
                    }
                }
            }
        }
        int ans = 0;
        for (int i = 0; i <= tot; i++) {
            if (end[i] >= 2) continue;
            ans += (f[m][i][0] + f[m][i][1]) % mod;;
            ans %= mod;
        }
        cout << ans << endl;
    }
} ACAM;

void solve() {
    int n, m; cin >> n >> m;
    for (int i = 1; i <= n; i++) {
        string s; cin >> s; 
        ACAM.insert(s);
    }
    ACAM.build();
    ACAM.get(m);
}

int main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);

    solve();

    return 0;
}
View Code

 P2292 [HNOI2004] L 语言

首先注意到字典串长度最大为20

那么我们处理出AC自动机上每个节点 fail 能经过哪些后缀节点把长度状压一下,再去 dp ,f [ i ] 表示文本串中位置 i 是否合法

我们由当前文本串匹配到的节点预处理出来的后缀长度进行转移, 但是这样只能拿 80 分原因是有个 20 的常数

考虑如何优化这个常数, 又要用到字典串最大只会有 20 的长度, 所以我们扫文本串是就相当于一个长度为 20 的区间不断左移,我们去状压维护这个区间合法状态即可

但是不太懂怎么状压,抄的别人的,但是队友会状压,相信队友!

80分的 dp 代码

#include <bits/stdc++.h>
using namespace std;
#define endl "\n"
typedef long long ll;

const int N = 2e6 + 100;
struct Aho_Corasick_Automaton {
    //basic
    int nxt[N][26], fail[N];
    int root, tot;
    //special
    int cnt[N], f[N], dep[N];

    void clear() {
        memset(nxt[0], 0, sizeof nxt[0]);
        root = tot = 0;
    }
    int newnode(int x) {
        tot++; dep[tot] = dep[x] + 1;
        memset(nxt[tot], 0, sizeof nxt[tot]);
        cnt[tot] = fail[tot] = f[tot] = 0;
        return tot;
    }
    void insert(string str, int num) {
        int now = root;
        for (int i = 0; i < str.size(); i++) {
            int id = str[i] - 'a';
            if (!nxt[now][id]) nxt[now][id] = newnode(now);
            now = nxt[now][id];
        }
        f[now] |= (1ll << (dep[now] - 1));
    }
    void build() {
        queue<int> q;
        for (int i = 0; i < 26; i++) {
            if (nxt[root][i]) q.push(nxt[root][i]);
        }
        while (!q.empty()) {
            int head = q.front(); q.pop();
            for (int i = 0; i < 26; i++) {
                // tire图
                int tmp = nxt[head][i];
                if(!tmp) nxt[head][i] = nxt[fail[head]][i];
                else{
                    fail[tmp] = nxt[fail[head]][i];
                    q.push(tmp);
                    f[tmp] |= f[fail[tmp]];
                }  
            }
        }
    }
    void query(string s) {
        vector<int> dp(s.size() + 100); dp[0] = 1;
        int now = root, ans = 0;
        s = " " + s;
        for (int i = 1; i < s.size(); i++) {
            int id = s[i] - 'a';
            now = nxt[now][id];
            for (int j = 1; j <= 20; j++) {
                if (f[now] >> (j - 1) & 1) {
                    dp[i] |= dp[i - j];
                }
            }
            if (dp[i]) ans = i;
        }
        cout << ans << endl;
    }    
} ACAM;

void solve() {
    int n, m; cin >> n >> m;
    for (int i = 1; i <= n; i++) {
        string s; cin >> s;
        ACAM.insert(s, i);
    }
    ACAM.build();
    while (m--) {
        string s; cin >> s;
        ACAM.query(s);
    }
}

signed main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);

    solve();

    return 0;
}
View Code

优化常数后的代码

#include <bits/stdc++.h>
using namespace std;
#define endl "\n"
typedef long long ll;

const int N = 2e6 + 100;
struct Aho_Corasick_Automaton {
    //basic
    int nxt[N][26], fail[N];
    int root, tot;
    //special
    int cnt[N], f[N], dep[N];

    void clear() {
        memset(nxt[0], 0, sizeof nxt[0]);
        root = tot = 0;
    }
    int newnode(int x) {
        tot++; dep[tot] = dep[x] + 1;
        memset(nxt[tot], 0, sizeof nxt[tot]);
        cnt[tot] = fail[tot] = f[tot] = 0;
        return tot;
    }
    void insert(string str, int num) {
        int now = root;
        for (int i = 0; i < str.size(); i++) {
            int id = str[i] - 'a';
            if (!nxt[now][id]) nxt[now][id] = newnode(now);
            now = nxt[now][id];
        }
        f[now] |= (1ll << (str.size()));
    }
    void build() {
        queue<int> q;
        for (int i = 0; i < 26; i++) {
            if (nxt[root][i]) q.push(nxt[root][i]);
        }
        while (!q.empty()) {
            int head = q.front(); q.pop();
            // f[head] |= f[fail[head]];
            for (int i = 0; i < 26; i++) {
                // tire图
                int tmp = nxt[head][i];
                if(!tmp) nxt[head][i] = nxt[fail[head]][i];
                else{
                    fail[tmp] = nxt[fail[head]][i];
                    q.push(tmp);
                    f[tmp] |= f[fail[tmp]];
                }  
            }
        }
    }
    void query(string s) {
        int now = root, ans = 0, x = 1;
        for (int i = 1; i <= s.size(); i++) {
            int id = s[i - 1] - 'a';
            now = nxt[now][id];
            int tmp = f[now];
            if ((x << 1) & tmp) ans = i, x = ((x << 1) | 1) & ((1 << 21) - 1);
            else x = ((x << 1) & (1 << 21) - 1);
        }
        cout << ans << endl;
    }    
} ACAM;

void solve() {
    int n, m; cin >> n >> m;
    for (int i = 1; i <= n; i++) {
        string s; cin >> s;
        ACAM.insert(s, i);
    }
    ACAM.build();
    while (m--) {
        string s; cin >> s;
        ACAM.query(s);
    }
}

signed main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);

    solve();

    return 0;
}
View Code

 


 

 

 Trie 图性质

F-病毒

Trie 图上(不是 fail 树)不断的跳, 如果出现了不经过终止节点的环则满足条件

我们可以这样想这题, 假设我们得到了一个无限长的串, 那么我们拿他去和 Trie 图上节点匹配, 如果出现过那么我们在跳的途中一定是走到一个终止节点了

那么为了不走到这个节点, 我们要不通过终止节点的一直在一个环上走就满足了

View Code

 

posted @ 2024-02-16 23:32  zhujio  阅读(28)  评论(0)    收藏  举报