D. Match & Catch 后缀自动机 || 广义后缀自动机

http://codeforces.com/contest/427/problem/D

题目是找出两个串的最短公共子串，并且在两个串中出现的次数只能是1次。

正解好像是dp啥的，但是用sam可以方便很多，复杂度n^2

首先对两个串建立sam，拓扑dp出endpos集合的大小，然后枚举第二个串的所有子串，在两个sam中跑就行了。

很无脑。从[i, j] 递推到[i, j + 1]这个子串，是可以O(1)转移的。

#include <bits/stdc++.h>
#define IOS ios::sync_with_stdio(false)
using namespace std;
#define inf (0x3f3f3f3f)
typedef long long int LL;
const int maxn = 5000 * 2 + 20, N = 26;
struct Node {
    int mxCnt; //mxCnt表示后缀自动机中当前节点识别子串的最大长度
    int miCnt; //miCnt表示后缀自动机中当前节点识别子串的最小长度
    int id; //表示它是第几个后缀自动机节点，指向了它，但是不知道是第几个，用id判断
    int pos; //pos表示它在原串中的位置。
    bool flag; //表示当前节点是否能识别前缀
    struct Node *pNext[N], *fa;
} suffixAutomaton[maxn * 2], sam[maxn * 2], *root, *last; //大小需要开2倍，因为有一些虚拟节点
int t;  //用到第几个节点
struct Node *create(int mxCnt = -1, struct Node *node = NULL) { //新的节点
    if (mxCnt != -1) {
        suffixAutomaton[t].mxCnt = mxCnt, suffixAutomaton[t].fa = NULL;
        for (int i = 0; i < N; ++i) suffixAutomaton[t].pNext[i] = NULL;
    } else {
        suffixAutomaton[t] = *node; //保留了node节点所有的指向信息。★全部等于node
        //可能需要注意下pos，在原串中的位置。现在pos等于原来node的pos
    }
    suffixAutomaton[t].id = t;  //必须要有的，不然id错误
    suffixAutomaton[t].flag = false; //默认不是前缀节点
    return &suffixAutomaton[t++];
}
void addChar(int x, int pos) { //pos表示在原串的位置
    struct Node *p = last, *np = create(p->mxCnt + 1, NULL);
    np->flag = true;
    np->pos = pos, last = np; //last是最尾那个可接收后缀字符的点。
    for (; p != NULL && p->pNext[x] == NULL; p = p->fa) p->pNext[x] = np;
    if (p == NULL) {
        np->fa = root;
        np->miCnt = 1; // 从根节点引一条边过来
        return;
    }
    struct Node *q = p->pNext[x];
    if (q->mxCnt == p->mxCnt + 1) { //中间没有任何字符
        np->fa = q;
        np->miCnt = q->mxCnt + 1; // q是7-->8的那些"ab"，np是"bab"长度是2+1
        return;
    }
    // p： 当前往上爬到的可以接受后缀的节点
    // np：当前插入字符x的新节点
    // q： q = p->pNext[x]，q就是p中指向的x字符的节点
    // nq：因为q->cnt != p->cnt + 1而新建出来的模拟q的节点
    struct Node *nq = create(-1, q); // 新的q节点，用来代替q，帮助np接收后缀字符
    nq->mxCnt = p->mxCnt + 1; //就是需要这样，这样中间不包含任何字符
    q->miCnt = nq->mxCnt + 1, np->miCnt = nq->mxCnt + 1;
    q->fa = nq, np->fa = nq; //现在nq是包含了本来q的所有指向信息
    for (; p && p->pNext[x] == q; p = p->fa) {
        p->pNext[x] = nq;
    }
}
void init() {
    t = 0;
    root = last = create(0, NULL);
}
void build(char str[], int lenstr) {
    init();
    for (int i = 1; i <= lenstr; ++i) addChar(str[i] - 'a', i);
}
char str[maxn], sub[maxn];
int in[maxn], que[maxn], dp[2][maxn];
unsigned long long int sum[maxn], po[maxn];
bool ok(int en, int len, int lensub) {
    unsigned long long int val = sum[en] - sum[en - len] * po[len];
    int tim = 0;
    for (int i = len; i <= lensub; ++i) {
        if (val == sum[i] - sum[i - len] * po[len]) tim++;
    }
    return tim == 1;
}
void init(int t, struct Node * suffixAutomaton, int dp[]) {
    memset(in, false, sizeof in);
    for (int i = 1; i < t; ++i) {
        if (suffixAutomaton[i].flag) dp[i] = 1;
        in[suffixAutomaton[i].fa->id]++;
    }
    int head = 0, tail = 0;
    for (int i = 1; i < t; ++i) {
        if (in[i] == 0) que[tail++] = i;
    }
    while (head < tail) {
        int cur = que[head++];
        if (cur == 0) break;
        dp[suffixAutomaton[cur].fa->id] += dp[cur];
        in[suffixAutomaton[cur].fa->id]--;
        if (in[suffixAutomaton[cur].fa->id] == 0)
            que[tail++] = suffixAutomaton[cur].fa->id;
    }
}
void work() {
    scanf("%s%s", str + 1, sub + 1);
    int lenstr = strlen(str + 1), lensub = strlen(sub + 1);
    build(str, lenstr);
    int sam_t = t;
    memcpy(sam, suffixAutomaton, sizeof suffixAutomaton);
    build(sub, lensub);
    init(sam_t, sam, dp[0]);
    init(t, suffixAutomaton, dp[1]);
//    printf("%d\n", dp[1][5]);
    int mi = inf;
    for (int i = 1; i <= lensub; ++i) {
        int strnow = 0, subnow = 0;
        for (int j = i; j <= lensub; ++j) {
            int id = sub[j] - 'a';
            if (sam[strnow].pNext[id] == NULL) break;
            strnow = sam[strnow].pNext[id]->id;
            subnow = suffixAutomaton[subnow].pNext[id]->id;
            if (dp[0][strnow] == 1 && dp[1][subnow] == 1) {
                mi = min(mi, j - i + 1);
            }
        }
    }
    if (mi == inf) mi = -1;
    printf("%d\n", mi);
}

int main() {
#ifdef local
    freopen("data.txt", "r", stdin);
//    freopen("data.txt", "w", stdout);
#endif
    work();
    return 0;
}

View Code

这题有一个O（n）的算法，那就是，把两个串合并。中间用一个字符分割，这是为了不产生多余的子串。

主要思想就是，要找到一个串，出现的次数为2，并且是在两个不同的串中分别出现的。

出现次数是2，那么就是拓扑dp的时候，endpos集合的大小是2即可。那么怎么限制在两个不同的串中出现过？

记在第一个串出现的时候，id是1 << 0，第二个串出现的时候，id是1 << 1

然后在dp出endpos集合大小的时候，顺便也维护一下在那里出现过即可。

每一个状态，都可能包含了若干个子串，那么需要取最短的子串。

#include <bits/stdc++.h>
#define IOS ios::sync_with_stdio(false)
using namespace std;
#define inf (0x3f3f3f3f)
typedef long long int LL;
const int maxn = 10000 * 2 + 20, N = 30;
struct Node {
    int mxCnt; //mxCnt表示后缀自动机中当前节点识别子串的最大长度
    int miCnt; //miCnt表示后缀自动机中当前节点识别子串的最小长度
    int id; //表示它是第几个后缀自动机节点，指向了它，但是不知道是第几个，用id判断
    int pos; //pos表示它在原串中的位置。
    bool flag; //表示当前节点是否能识别前缀
    struct Node *pNext[N], *fa;
}suffixAutomaton[maxn * 2], *root, *last; //大小需要开2倍，因为有一些虚拟节点
int t;  //用到第几个节点
struct Node *create(int mxCnt = -1, struct Node *node = NULL) { //新的节点
    if (mxCnt != -1) {
        suffixAutomaton[t].mxCnt = mxCnt, suffixAutomaton[t].fa = NULL;
        for (int i = 0; i < N; ++i) suffixAutomaton[t].pNext[i] = NULL;
    } else {
        suffixAutomaton[t] = *node; //保留了node节点所有的指向信息。★全部等于node
        //可能需要注意下pos，在原串中的位置。现在pos等于原来node的pos
    }
    suffixAutomaton[t].id = t;  //必须要有的，不然id错误
    suffixAutomaton[t].flag = false; //默认不是前缀节点
    return &suffixAutomaton[t++];
}
void addChar(int x, int pos) { //pos表示在原串的位置
    struct Node *p = last, *np = create(p->mxCnt + 1, NULL);
    np->flag = true;
    np->pos = 1 << pos, last = np; //last是最尾那个可接收后缀字符的点。
    for (; p != NULL && p->pNext[x] == NULL; p = p->fa) p->pNext[x] = np;
    if (p == NULL) {
        np->fa = root;
        np->miCnt = 1; // 从根节点引一条边过来
        return;
    }
    struct Node *q = p->pNext[x];
    if (q->mxCnt == p->mxCnt + 1) { //中间没有任何字符
        np->fa = q;
        np->miCnt = q->mxCnt + 1; // q是7-->8的那些"ab"，np是"bab"长度是2+1
        return;
    }
    // p： 当前往上爬到的可以接受后缀的节点
    // np：当前插入字符x的新节点
    // q： q = p->pNext[x]，q就是p中指向的x字符的节点
    // nq：因为q->cnt != p->cnt + 1而新建出来的模拟q的节点
    struct Node *nq = create(-1, q); // 新的q节点，用来代替q，帮助np接收后缀字符
    nq->mxCnt = p->mxCnt + 1; //就是需要这样，这样中间不包含任何字符
    q->miCnt = nq->mxCnt + 1, np->miCnt = nq->mxCnt + 1;
    q->fa = nq, np->fa = nq; //现在nq是包含了本来q的所有指向信息
    for (; p && p->pNext[x] == q; p = p->fa) {
        p->pNext[x] = nq;
    }
}
void init() {
    t = 0;
    root = last = create(0, NULL);
}
void build(char str[], int lenstr) {
    init();
    for (int i = 1; i <= lenstr; ++i) addChar(str[i] - 'a', i);
}
char str[maxn], sub[maxn];
int que[maxn * 2], in[maxn], dp[maxn], is[maxn];
void work() {
    scanf("%s%s", str + 1, sub + 1);
    init();
    for (int i = 1; str[i]; ++i) addChar(str[i] - 'a', 0);
    addChar(27, 2);
    for (int i = 1; sub[i]; ++i) addChar(sub[i] - 'a', 1);
    for (int i = 1; i < t; ++i) {
        is[i] = suffixAutomaton[i].pos;
        if (suffixAutomaton[i].flag) dp[i] = 1;
        in[suffixAutomaton[i].fa->id]++;
    }
    int head = 0, tail = 0;
    for (int i = 1; i < t; ++i) {
        if (in[i] == 0) que[tail++] = i;
    }
    while (head < tail) {
        int cur = que[head++];
        if (!cur) break;
        is[suffixAutomaton[cur].fa->id] |= is[cur];
        dp[suffixAutomaton[cur].fa->id] += dp[cur];
        in[suffixAutomaton[cur].fa->id]--;
        if (in[suffixAutomaton[cur].fa->id] == 0) que[tail++] = suffixAutomaton[cur].fa->id;
    }
    int mi = inf;
    for (int i = 1; i < t; ++i) {
        if (is[i] == 3 && dp[i] == 2) {
            mi = min(mi, suffixAutomaton[i].miCnt); //最短
        }
    }
    if (mi == inf) mi = -1;
    printf("%d\n", mi);
}

int main() {
#ifdef local
    freopen("data.txt", "r", stdin);
//    freopen("data.txt", "w", stdout);
#endif
    work();
    return 0;
}

View Code

也可以直接用广义后缀自动机。

广义后缀自动机能识别多个主串的所有子串，并且在拓扑dp的时候能识别到是在那个串出现的。

广义后缀自动机就是把多个主串统一弄起来，每次都从root开始插入

这就带来一个问题就是已经存在了该节点。

那么就不需要np了。如果该节点能够代替新插入的节点接受后缀，也就是p->mxCnt + 1 == q->mxCnt，中间不含有任何字符。那么last直接去到q就好了，否则就要新建节点nq来弄个节点代替q接受后缀。和后缀自动机一个意思。

ps: 这个节点就是当前id的前缀节点。是属于id的。

#include <bits/stdc++.h>
#define IOS ios::sync_with_stdio(false)
using namespace std;
#define inf (0x3f3f3f3f)
typedef long long int LL;
const int MOD = 1e9 + 7;
const int maxn = 1e5 + 20, N = 26;
struct Node {
    int mxCnt; //mxCnt表示后缀自动机中当前节点识别子串的最大长度
    int miCnt; //miCnt表示后缀自动机中当前节点识别子串的最小长度
    int id; //表示它是第几个后缀自动机节点，指向了它，但是不知道是第几个，用id判断
    int pos; //pos表示它在原串中的位置。
    bool flag; //表示当前节点是否能识别前缀
    bool R[3]; // 广义后缀自动机识别此状态是否在第R[i]个主串中出现过
    struct Node *pNext[N], *fa;
}suffixAutomaton[maxn * 2], *root, *last; //大小需要开2倍，因为有一些虚拟节点
int t;  //用到第几个节点
struct Node *create(int mxCnt = -1, struct Node *node = NULL) { //新的节点
    if (mxCnt != -1) {
        suffixAutomaton[t].mxCnt = mxCnt, suffixAutomaton[t].fa = NULL;
        for (int i = 0; i < N; ++i) suffixAutomaton[t].pNext[i] = NULL;
    } else {
        suffixAutomaton[t] = *node; //保留了node节点所有的指向信息。★全部等于node
        //可能需要注意下pos，在原串中的位置。现在pos等于原来node的pos
    }
    suffixAutomaton[t].id = t;  //必须要有的，不然id错误
    suffixAutomaton[t].flag = false; //默认不是前缀节点
    return &suffixAutomaton[t++];
}
void addChar(int x, int pos, int id) { //pos表示在原串的位置
    struct Node *p = last;
    if (p->pNext[x] != NULL) { // 有了，就不需要np
        struct Node *q = p->pNext[x];
        if (p->mxCnt + 1 == q->mxCnt) {
            last = q; //用来接收后缀字符
            q->R[id] = true;
q->flag = true;
            return;
        }
        //现在的q没办法成为接受后缀的点
        //那么就开一个节点模拟它，所以这个节点是id的前缀节点
        struct Node * nq = create(-1, q);
        for (int i = 0; i < 3; ++i) nq->R[i] = false;
        nq->mxCnt = p->mxCnt + 1;
        nq->R[id] = true;
        nq->flag = true; //这个点是属于id的。是id的前缀节点
        q->fa = nq; //这里是没有np的
        q->miCnt = nq->mxCnt + 1;
        for (; p && p->pNext[x] == q; p = p->fa) p->pNext[x] = nq;
        last = nq; //成为接受后缀的节点。
        return;
    }
    struct Node *np = create(p->mxCnt + 1, NULL);
    for (int i = 0; i < 3; ++i) np->R[i] = false; //每次都要清空
    np->R[id] = true;
    np->flag = true; //前缀节点
    np->pos = pos, last = np; //last是最尾那个可接收后缀字符的点。
    for (; p != NULL && p->pNext[x] == NULL; p = p->fa) p->pNext[x] = np;
    if (p == NULL) {
        np->fa = root;
        np->miCnt = 1; // 从根节点引一条边过来
        return;
    }
    struct Node *q = p->pNext[x];
    if (q->mxCnt == p->mxCnt + 1) { //中间没有任何字符，可以用来代替接受后缀、
        np->fa = q;
        np->miCnt = q->mxCnt + 1; // q是状态8的"ab"，np是状态7的"bab"长度是2+1
        return;
    }
    struct Node *nq = create(-1, q); // 新的q节点，用来代替q，帮助np接收后缀字符
    for (int i = 0; i < 3; ++i) nq->R[i] = false;
    nq->mxCnt = p->mxCnt + 1; //就是需要这样，这样中间不包含任何字符
    q->miCnt = nq->mxCnt + 1, np->miCnt = nq->mxCnt + 1;
    q->fa = nq, np->fa = nq; //现在nq是包含了本来q的所有指向信息
    for (; p && p->pNext[x] == q; p = p->fa) {
        p->pNext[x] = nq;
    }
}
void init() {
    t = 0;
    root = last = create(0, NULL);
}
char str[maxn];
int dp[maxn * 2][2];
int d[maxn * 2];
queue<int> que;
int in[maxn];
void work() {
    init();
    scanf("%s", str + 1);
    for (int i = 1; str[i]; ++i) addChar(str[i] - 'a', i, 0);
    last = root;
    scanf("%s", str + 1);
    for (int i = 1; str[i]; ++i) addChar(str[i] - 'a', i, 1);
    for (int i = 1; i < t; ++i) {
        in[suffixAutomaton[i].fa->id]++;
        if (suffixAutomaton[i].flag) {
            dp[i][0] = suffixAutomaton[i].R[0];
            dp[i][1] = suffixAutomaton[i].R[1];
            d[i] = suffixAutomaton[i].R[0] + suffixAutomaton[i].R[1];
        }
    }
    for (int i = 1; i < t; ++i) {
        if (in[i] == 0) que.push(i);
    }
    while (!que.empty()) {
        int cur = que.front();
        que.pop();
        if (!cur) break;
        dp[suffixAutomaton[cur].fa->id][0] += dp[cur][0];
        dp[suffixAutomaton[cur].fa->id][1] += dp[cur][1];
        d[suffixAutomaton[cur].fa->id] += d[cur];
        in[suffixAutomaton[cur].fa->id]--;
        if (in[suffixAutomaton[cur].fa->id] == 0) que.push(suffixAutomaton[cur].fa->id);
    }
    int ans = inf;
    for (int i = 1; i < t; ++i) {
        assert(d[i] == dp[i][0] + dp[i][1]);
        if (dp[i][0] == 1 && dp[i][1] == 1) {
            ans = min(ans, suffixAutomaton[i].miCnt);
        }
    }
    if (ans == inf) ans = -1;
    printf("%d\n", ans);
}

int main() {
#ifdef local
    freopen("data.txt", "r", stdin);
//    freopen("data.txt", "w", stdout);
#endif
    work();
    return 0;
}

View Code

posted on 2017-09-06 10:48 stupid_one 阅读(293) 评论(0) 收藏举报

刷新页面返回顶部

D. Match & Catch 后缀自动机 || 广义后缀自动机

导航

公告