# AC自动机

## 使用场景

AC自动机是一种著名的多模式匹配算法。

## 算法思想与流程

fail 指向 p 代表当前串的最长已知后缀。

void procFail(int * q) {
for (int i(0); i < 26; ++i) {
if (kids[0][i]) q[tail++] = kids[0][i];
}

for (int i(0); i < 26; ++i) {
if (kids[x][i]) {
fail[kids[x][i]] = kids[fail[x]][i];
q[tail++] = kids[x][i];
} else kids[x][i] = kids[fail[x]][i];
}
} // procFail end
}


## 匹配的判断

void insert(string &s) {
int p(0);
for (int c : s) {
if (!kids[p][(c -= 'a')]) kids[p][c] = ++usage;
p = kids[p][c];
}
++cnt[p];
}


int ACMatch(string & s) {
int p(0), ans(0);
for (int c : s) {
p = kids[p][(c -= 'a')];
for (int t(p); t && ~cnt[t]; t = fail[t]) {
ans += cnt[t], cnt[t] = -1;
}
}
return ans;
}


## 失配树的应用

inline void ACMatch(string &s) {
int p(0);
for (char c : s) {
p = kids[p][c - 'a'];
++cnt[p];
}
}

inline void ACCount(int * q) {
for (int i = usage; i; --i) {
cnt[fail[q[i]]] += cnt[q[i]];
}
}


void insert(string &s, int i) {
int p(0);
for (int c : s) {
if (!kids[p][(c -= 'a')]) kids[p][c] = newNode();
p = kids[p][c];
}
pos[i] = p;
}

inline void ACOutput(int n) {
for (int i = 1; i <= n; ++i) {
cout << cnt[pos[i]] << '\n';
}
}


inline void ACprepare(int * q) {
for (int i = 1; i <= usage; ++i) {
len[q[i]] = max(len[q[i]], len[fail[q[i]]]);
}
}


inline void ACclean(string &s) {
int p(0);
for (unsigned i(0), ie = s.size(); i < ie; ++i) {
p = kids[p][discrete(s[i])];
if (len[p]) for (unsigned j = i - len[p] + 1; j <= i; ++j)
s[j] = '*';
}
}


## 对状态的理解

string sub, pat;
cin >> sub >> pat;
insert(sub), procFail(Q);

int p = 0;
for (int i(0), ie = pat.size(); i < ie; ++i) {
p = kids[cps[ci]][pat[i] - 'a'];
cps[++ci] = p, ccs[ci] = pat[i];
if (match[p]) ci -= sub.size();
}

for (int i = 1; i <= ci; ++i) {
putchar(ccs[i]);
}


#include <iostream>
#include <algorithm>
#include <string>

using namespace std;
const int N = 1e6 + 7;

int res[N], cnt[N], pos[N];
class ACAutomaton {
private:
int kids[N][26];
int fail[N], id[N], usage;
public:
ACAutomaton() : usage(0) {
}

inline int newNode() {
fill_n(kids[++usage], 26, 0);
cnt[usage] = fail[usage] = id[usage] = 0;
return usage;
}

void insert(string &s, int i) {
int p(0);
for (int c : s) {
if (!kids[p][(c -= 'a')]) kids[p][c] = newNode();
p = kids[p][c];
}
pos[i] = p;
}

void procFail(int * q) {
for (int i(0); i < 26; ++i) {
if (kids[0][i])
fail[kids[0][i]] = 0, q[tail++] = kids[0][i];
}

for (int i(0); i < 26; ++i) {
if (kids[x][i]) {
fail[kids[x][i]] = kids[fail[x]][i];
q[tail++] = kids[x][i];
} else kids[x][i] = kids[fail[x]][i];
}
} // procFail end
}

void debug() {
for (int i = 0; i <= usage; ++i) {
printf("node %d (cnt %d) fail to %d:\n\t", i, cnt[i], fail[i]);
for (int j(0); j < 26; ++j) {
printf("%d ", kids[i][j]);
} puts("");
}
}

inline void ACMatch(string &s) {
int p(0);
for (char c : s) {
p = kids[p][c - 'a'];
++cnt[p];
}
}

inline void ACCount(int * q) {
for (int i = usage; i; --i) {
cnt[fail[q[i]]] += cnt[q[i]];
}
}

inline void ACOutput(int n) {
for (int i = 1; i <= n; ++i) {
cout << cnt[pos[i]] << '\n';
}
}

void clear() {
usage = -1;
newNode(); // clear 0
}
} ac;

int Q[N];
string s;

int main() {
cin.tie(0)->sync_with_stdio(false);

int n;
cin >> n;
for (int i = 1; i <= n; ++i) {
cin >> s;
ac.insert(s, i);
} ac.procFail(Q);

cin >> s;
ac.ACMatch(s);
ac.ACCount(Q);
ac.ACOutput(n);
return 0;
}


posted @ 2023-03-26 14:14  jeefy  阅读(101)  评论(0编辑  收藏  举报