【模板】AC自动机

 

AC快乐机主要用于多个模式串与一个字符串做匹配

构建是将模式串构成一棵树

将文本串放在Trie上(构建方式同Trie),匹配的时候借用KMP的思想,从Trie上的某个点继续开始匹配

i匹配失败后继续从j开始匹配,j是i的Fail指针(失配指针)

 

首先,每个点i的Fail指针指向的深度一定比i小(Fail指的是后缀)

第一层Fail指向root,也可以构建一个虚节点0号节点,将0号节点所有的儿子都指向root(root编号为1),然后root的fail指向0节点

void getFail(){
    for(int i = 0; i < 26;i++){
        trie[0].son[i] = 1;//初始化0的所有儿子都为 1
    }
    q.push(1);//将根节点压入队列 
    trie[1].fail = 0;
    while(!q.empty()){
        int u = q.front();
        q.pop();
        for(int i = 0; i < 26; i++){//遍历所有子节点 
            int v = trie[u].son[i];//处理u的儿子i的fail 
            int Fail = trie[u].fail;//与v值相同的点,就是fafail 
            if(!v){//不存在该节点 
                trie[u].son[i] = trie[Fail].son[i];
                continue;
            }
            trie[v].fail = trie[Fail].son[i];
            q.push(v);//存在实店压入队列 
        }
    }
} 

查询操作

 

为了避免重复,每经过一个点就打标记为-1,下次经过就并不需要重复计算了

int query(char* s){
    int u = 1,ans = 0,len = strlen(s);
    for(int i = 0; i < len; i++){
        int v = s[i] - 'a';
        int k = trie[u].son[v];
        while(k > 1 && trie[k].flag != -1)//经过不需要统计
        {
            ans += trie[k].flag;
            trie[k].flag = -1;
            k = trie[k].fail;
         } 
         u = trie[u].son[v];
    }
    return ans;
} 

 完整代码

#include <iostream>
#include <queue>
#include <algorithm>

using namespace std;
const int maxn = 1000010;
struct Trie{
    int son[26];
    int flag;//标记末尾
    int fail;
    Trie(){
        flag = 0;
        fail = -1;
    }
}trie[maxn];
int cnt;//main函数里面记得将cnt赋值为1
void  insert(string s){
    int p = 1;//指针,最先指向root
    for(int i = 0; i < s.length(); i++){
        int word = s[i] - 'a';//当前字符
        if(!trie[p].son[word])
            trie[p].son[word] = ++cnt;//如果没有节点,分配一个
        p = trie[p].son[word]; //指向下一个
    }

    trie[p].flag++;//计数,当前单词个数
}
void get_fail(){
    queue<int> q;//BFS
    for(int i = 0; i < 26; i++)        trie[0].son[i] = 1;//初始化0节点的所有儿子为1(root)
    q.push(1);
    trie[1].fail = 0;//1节点的fail指针为0
    while(!q.empty()){
        int fa = q.front();//取出父亲节点
        q.pop();
        for(int i = 0; i < 26; i++){
                int now = trie[fa].son[i];//当前遍历节点
                int faFail = trie[fa].fail;//父亲的fail
                if(!now){//不存在该节点
                    trie[fa].son[i] = trie[faFail].son[i];//跳转
                    continue;
                }
                //存在
                //当前的fail指向父亲fail指向的相同字符串
                trie[now].fail = trie[faFail].son[i];
                q.push(now);//now压入队列,继续遍历

        }
    }
}
int query(string s){
    int ans = 0;//有多少个查询到的
    int p = 1;//指针,指向root,root为1
    for(int i = 0; i < s.size();i++){
        int now = s[i] - 'a';
        int k = trie[p].son[now];//进入节点
        while(k > 1 && trie[k].flag != -1){//flag != -1 表示已经经过,不计算了
            ans += trie[k].flag;

            trie[k].flag = -1;//标记已经遍历过
            k = trie[k].fail;//跳fail
        }
        p = trie[p].son[now];//跳fail
    }

    return ans;
}
int main(){
    cnt = 1;
    int t;
    cin>> t;
    string str;
    for(int i = 0; i < t;i++){
        cin>>str;
        insert(str);
    }
    string word;
    cin>>word;
    get_fail();
    cout<<query(word)<<endl;
}

 

多次搜索:力扣https://leetcode-cn.com/problems/multi-search-lcci/

给定一个较长字符串big和一个包含较短字符串的数组smalls,设计一个方法,根据smalls中的每一个较短字符串,对big进行搜索。输出smalls中的字符串在big里出现的所有位置positions,其中positions[i]为smalls[i]出现的所有位置。

示例:

输入:
big = "mississippi"
smalls = ["is","ppi","hi","sis","i","ssippi"]
输出: [[1,4],[8],[],[3],[1,4,7,10],[5]]

 

class Solution {
public:
    struct Trie{
        int son[26];
        int flag;
        int fail = -1;
    }trie[100010];
    int cnt;
    void insert(string s,int a){
        int p = 1;
        for(int i = 0; i < s.length(); i++){
            int word  = s[i]-'a';
            if(!trie[p].son[word]){
                trie[p].son[word] = ++cnt;
            }
            p = trie[p].son[word];
        }
        trie[p].flag = a+1;
    }
    void get_fail(){
        queue<int> q;
        for(int i = 0; i < 26;i++)      trie[0].son[i] = 1;
        q.push(1);
        trie[1].fail = 0;
        while(!q.empty()){
            int fa = q.front();
            q.pop();
            for(int i = 0; i < 26; i++){
                    int now = trie[fa].son[i];
                    int faFail = trie[fa].fail;
                    if(!now){
                        trie[fa].son[i] = trie[faFail].son[i];
                        continue;
                    }
                    trie[now].fail = trie[faFail].son[i];
                    q.push(now);
            }
        }
    }
    vector<vector<int>> query(string big, vector<string>& smalls){
        int p = 1;
        vector<vector<int>> ans(smalls.size());
        for(int i = 0; i < big.length(); i++){
            int now = big[i] - 'a';
            int k = trie[p].son[now];
            while(k > 1){
                if(trie[k].flag > 0){
                    int index = i - smalls[trie[k].flag-1].size()+1;
                    ans[trie[k].flag-1].push_back(index);
                }
                k = trie[k].fail;
                
            }
            p = trie[p].son[now];
        } 
        return ans;
    }
    vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
        cnt = 1;
        int n = smalls.size();
        for(int i = 0; i < n; i++){
            insert(smalls[i],i);
        }
        get_fail();
        return query(big,smalls);
    }
};

 

 另一种版本(更适合我,字典树用指针构造),这种更快

class Solution {
public:
    struct Trie{
        int sid;
        Trie *child[26];
        Trie(){
            sid = -1;
            memset(child,0,sizeof(child));
        }
    };
    Trie* root = new Trie();
    void insert(string word,int s){
        int n = word.length();
        Trie* node = root;
        for(int i = 0; i < n; i++){
            int cid = word[i]-'a';
            if(node -> child[cid] == NULL)
                node -> child[cid] = new Trie();
            node = node ->child[cid];
        }
        node ->sid = s;//记录位置
    }
    int n,m;
    
    void search(string word,vector<vector<int>>&ans,int bid){
        int n = word.length();
        Trie* node = root;
        for(int i = 0; i < n; i++){
            int cid = word[i] - 'a';
            if(node -> sid != -1)   ans[node -> sid].push_back(bid);
            if(node -> child[cid]==NULL)   return;
            node = node -> child[cid];
        }
        if(node -> sid != -1)   ans[node->sid].push_back(bid);
    }
    vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
        n = smalls.size(),m = big.length(); 
        vector<vector<int>> ans(n,vector<int>{});
        for(int i = 0; i < n; i++){
            if(smalls[i].size() == 0)   continue;
            insert(smalls[i],i);
        }
        for(int i = 0; i < m; i++){
            string word = big.substr(i,m-i);
            search(word,ans,i);
        }
        return ans;
    }
};

 

推荐相关视频:

b站:星垂月朦胧

https://www.bilibili.com/video/BV1Nk4y1B7SL?p=1

posted @ 2020-10-14 19:43  我不秃  阅读(130)  评论(0)    收藏  举报