【模板】AC自动机
AC快乐机主要用于多个模式串与一个字符串做匹配
构建是将模式串构成一棵树
将文本串放在Trie上(构建方式同Trie),匹配的时候借用KMP的思想,从Trie上的某个点继续开始匹配
i匹配失败后继续从j开始匹配,j是i的Fail指针(失配指针)
首先,每个点i的Fail指针指向的深度一定比i小(Fail指的是后缀)
第一层Fail指向root,也可以构建一个虚节点0号节点,将0号节点所有的儿子都指向root(root编号为1),然后root的fail指向0节点
void getFail(){ for(int i = 0; i < 26;i++){ trie[0].son[i] = 1;//初始化0的所有儿子都为 1 } q.push(1);//将根节点压入队列 trie[1].fail = 0; while(!q.empty()){ int u = q.front(); q.pop(); for(int i = 0; i < 26; i++){//遍历所有子节点 int v = trie[u].son[i];//处理u的儿子i的fail int Fail = trie[u].fail;//与v值相同的点,就是fafail if(!v){//不存在该节点 trie[u].son[i] = trie[Fail].son[i]; continue; } trie[v].fail = trie[Fail].son[i]; q.push(v);//存在实店压入队列 } } }
查询操作
为了避免重复,每经过一个点就打标记为-1,下次经过就并不需要重复计算了
int query(char* s){ int u = 1,ans = 0,len = strlen(s); for(int i = 0; i < len; i++){ int v = s[i] - 'a'; int k = trie[u].son[v]; while(k > 1 && trie[k].flag != -1)//经过不需要统计 { ans += trie[k].flag; trie[k].flag = -1; k = trie[k].fail; } u = trie[u].son[v]; } return ans; }
完整代码
#include <iostream> #include <queue> #include <algorithm> using namespace std; const int maxn = 1000010; struct Trie{ int son[26]; int flag;//标记末尾 int fail; Trie(){ flag = 0; fail = -1; } }trie[maxn]; int cnt;//main函数里面记得将cnt赋值为1 void insert(string s){ int p = 1;//指针,最先指向root for(int i = 0; i < s.length(); i++){ int word = s[i] - 'a';//当前字符 if(!trie[p].son[word]) trie[p].son[word] = ++cnt;//如果没有节点,分配一个 p = trie[p].son[word]; //指向下一个 } trie[p].flag++;//计数,当前单词个数 } void get_fail(){ queue<int> q;//BFS for(int i = 0; i < 26; i++) trie[0].son[i] = 1;//初始化0节点的所有儿子为1(root) q.push(1); trie[1].fail = 0;//1节点的fail指针为0 while(!q.empty()){ int fa = q.front();//取出父亲节点 q.pop(); for(int i = 0; i < 26; i++){ int now = trie[fa].son[i];//当前遍历节点 int faFail = trie[fa].fail;//父亲的fail if(!now){//不存在该节点 trie[fa].son[i] = trie[faFail].son[i];//跳转 continue; } //存在 //当前的fail指向父亲fail指向的相同字符串 trie[now].fail = trie[faFail].son[i]; q.push(now);//now压入队列,继续遍历 } } } int query(string s){ int ans = 0;//有多少个查询到的 int p = 1;//指针,指向root,root为1 for(int i = 0; i < s.size();i++){ int now = s[i] - 'a'; int k = trie[p].son[now];//进入节点 while(k > 1 && trie[k].flag != -1){//flag != -1 表示已经经过,不计算了 ans += trie[k].flag; trie[k].flag = -1;//标记已经遍历过 k = trie[k].fail;//跳fail } p = trie[p].son[now];//跳fail } return ans; } int main(){ cnt = 1; int t; cin>> t; string str; for(int i = 0; i < t;i++){ cin>>str; insert(str); } string word; cin>>word; get_fail(); cout<<query(word)<<endl; }
多次搜索:力扣https://leetcode-cn.com/problems/multi-search-lcci/
给定一个较长字符串big和一个包含较短字符串的数组smalls,设计一个方法,根据smalls中的每一个较短字符串,对big进行搜索。输出smalls中的字符串在big里出现的所有位置positions,其中positions[i]为smalls[i]出现的所有位置。
示例:
输入:
big = "mississippi"
smalls = ["is","ppi","hi","sis","i","ssippi"]
输出: [[1,4],[8],[],[3],[1,4,7,10],[5]]
class Solution { public: struct Trie{ int son[26]; int flag; int fail = -1; }trie[100010]; int cnt; void insert(string s,int a){ int p = 1; for(int i = 0; i < s.length(); i++){ int word = s[i]-'a'; if(!trie[p].son[word]){ trie[p].son[word] = ++cnt; } p = trie[p].son[word]; } trie[p].flag = a+1; } void get_fail(){ queue<int> q; for(int i = 0; i < 26;i++) trie[0].son[i] = 1; q.push(1); trie[1].fail = 0; while(!q.empty()){ int fa = q.front(); q.pop(); for(int i = 0; i < 26; i++){ int now = trie[fa].son[i]; int faFail = trie[fa].fail; if(!now){ trie[fa].son[i] = trie[faFail].son[i]; continue; } trie[now].fail = trie[faFail].son[i]; q.push(now); } } } vector<vector<int>> query(string big, vector<string>& smalls){ int p = 1; vector<vector<int>> ans(smalls.size()); for(int i = 0; i < big.length(); i++){ int now = big[i] - 'a'; int k = trie[p].son[now]; while(k > 1){ if(trie[k].flag > 0){ int index = i - smalls[trie[k].flag-1].size()+1; ans[trie[k].flag-1].push_back(index); } k = trie[k].fail; } p = trie[p].son[now]; } return ans; } vector<vector<int>> multiSearch(string big, vector<string>& smalls) { cnt = 1; int n = smalls.size(); for(int i = 0; i < n; i++){ insert(smalls[i],i); } get_fail(); return query(big,smalls); } };
另一种版本(更适合我,字典树用指针构造),这种更快
class Solution { public: struct Trie{ int sid; Trie *child[26]; Trie(){ sid = -1; memset(child,0,sizeof(child)); } }; Trie* root = new Trie(); void insert(string word,int s){ int n = word.length(); Trie* node = root; for(int i = 0; i < n; i++){ int cid = word[i]-'a'; if(node -> child[cid] == NULL) node -> child[cid] = new Trie(); node = node ->child[cid]; } node ->sid = s;//记录位置 } int n,m; void search(string word,vector<vector<int>>&ans,int bid){ int n = word.length(); Trie* node = root; for(int i = 0; i < n; i++){ int cid = word[i] - 'a'; if(node -> sid != -1) ans[node -> sid].push_back(bid); if(node -> child[cid]==NULL) return; node = node -> child[cid]; } if(node -> sid != -1) ans[node->sid].push_back(bid); } vector<vector<int>> multiSearch(string big, vector<string>& smalls) { n = smalls.size(),m = big.length(); vector<vector<int>> ans(n,vector<int>{}); for(int i = 0; i < n; i++){ if(smalls[i].size() == 0) continue; insert(smalls[i],i); } for(int i = 0; i < m; i++){ string word = big.substr(i,m-i); search(word,ans,i); } return ans; } };
推荐相关视频:
b站:星垂月朦胧
https://www.bilibili.com/video/BV1Nk4y1B7SL?p=1

浙公网安备 33010602011771号