• 博客园logo
  • 会员
  • 众包
  • 新闻
  • 博问
  • 闪存
  • 赞助商
  • HarmonyOS
  • Chat2DB
    • 搜索
      所有博客
    • 搜索
      当前博客
  • 写随笔 我的博客 短消息 简洁模式
    用户头像
    我的博客 我的园子 账号设置 会员中心 简洁模式 ... 退出登录
    注册 登录
james1207

博客园    首页    新随笔    联系   管理    订阅  订阅

后缀数组练习题若干

POJ 1743    不可重叠最长重复子串

二分答案。 即子串的长度,假设为k时。

利用height数组,将排序后的后缀分为若干组。

每组内的height值都不小于k。

然后只需查看组内是否有满足要求的两个不会产生重叠的子串即可。

 

#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
#include <set>
#include <queue>
#include <algorithm>
#define MAXN 22222
#define MAXM 111
#define INF 1000000000
using namespace std;
int r[MAXN];
int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN];
int sa[MAXN]; //index range 1~n value range 0~n-1
int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb, *ws = tmp;
    for (i = 0; i < m; i++) ws[i] = 0;
    for (i = 0; i < n; i++) ws[x[i] = r[i]]++;
    for (i = 1; i < m; i++) ws[i] += ws[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p)
    {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) ws[i] = 0;
        for (i = 0; i < n; i++) ws[wv[i]]++;
        for (i = 1; i < m; i++) ws[i] += ws[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
}
int rank[MAXN]; //index range 0~n-1 value range 1~n
int height[MAXN]; //index from 1   (height[1] = 0)
void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; ++i) rank[sa[i]] = i;
    for (i = 0; i < n; height[rank[i++]] = k)
        for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k);
    return;
}
int n, a[MAXN];
bool check(int mid, int n)
{
    int flag = 0;
    int mx = -1, mi = n;
    for(int i = 2; i <= n + 1; i++)
    {
        if((i == n + 1 && flag) || (height[i] < mid && flag))
        {
            flag = 0;
            mx = max(mx, sa[i - 1]);
            mi = min(mi, sa[i - 1]);
            if(mx - mi >= mid) return true;
            mi = n, mx = -1;
        }
        else if(height[i] >= mid)
        {
            flag = 1;
            mx = max(mx, sa[i - 1]);
            mi = min(mi, sa[i - 1]);
        }
    }
    return false;
}
int main()
{
    while(scanf("%d", &n) != EOF && n)
    {
        for(int i = 0; i < n; i++) scanf("%d", &a[i]);
        for(int i = 0; i < n - 1; i++) r[i] = a[i + 1] - a[i] + 89;
        r[--n] = 0;
        da(r, sa, n + 1, 200);
        calheight(r, sa, n);
        int low = 4, high = n / 2, ans = 0;
        while(low <= high)
        {
            int mid = (low + high) >> 1;
            if(check(mid, n))
            {
                low = mid + 1;
                ans = max(ans, mid);
            }
            else high = mid - 1;
        }
        if(ans < 4) printf("0\n");
        else printf("%d\n", ans + 1);
    }
    return 0;
}


 

 

POJ 3261 可重叠的出现K次的最长重复子串

还是二分子串长度。 后缀分为若干组,然后判断是否有一个组的size不小于k

#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
#include <set>
#include <queue>
#include <algorithm>
#define MAXN 22222
#define MAXM 111
#define INF 1000000000
using namespace std;
int r[MAXN];
int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN];
int sa[MAXN]; //index range 1~n value range 0~n-1
int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb, *ws = tmp;
    for (i = 0; i < m; i++) ws[i] = 0;
    for (i = 0; i < n; i++) ws[x[i] = r[i]]++;
    for (i = 1; i < m; i++) ws[i] += ws[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p)
    {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) ws[i] = 0;
        for (i = 0; i < n; i++) ws[wv[i]]++;
        for (i = 1; i < m; i++) ws[i] += ws[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
}
int rank[MAXN]; //index range 0~n-1 value range 1~n
int height[MAXN]; //index from 1   (height[1] = 0)
void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; ++i) rank[sa[i]] = i;
    for (i = 0; i < n; height[rank[i++]] = k)
        for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k);
    return;
}
int n, k;
bool check(int mid)
{
    int cnt = 1;
    for(int i = 2; i <= n; i++)
    {
        if(height[i] < mid) cnt = 1;
        else cnt++;
        if(cnt >= k) return 1;
    }
    return 0;
}
int main()
{
    int m = 0;
    scanf("%d%d", &n, &k);
    for(int i = 0; i < n; i++)
    {
        scanf("%d", &r[i]);
        r[i]++;
        m = max(r[i], m);
    }
    r[n] = 0;
    da(r, sa, n + 1, m + 1);
    calheight(r, sa, n);
    int low = 1, high = n;
    int ans = 0;
    while(low <= high)
    {
        int mid = (low + high) >> 1;
        if(check(mid))
        {
            ans = max(ans, mid);
            low = mid + 1;
        }
        else high = mid - 1;
    }
    printf("%d\n", ans);
    return 0;
}

 

 




SPOJ SUBST1 求一个串中不同子串的个数

每个子串都是某个后缀的前缀

对于一个后缀。 它将产生n - sa[k]个前缀

但是有height[k]个前缀是跟前一个字符串的前缀相同。

故每个后缀的贡献是n - sa[k] - height[k]

求和即可

 

#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
#include <set>
#include <queue>
#include <algorithm>
#define MAXN 55555
#define MAXM 111
#define INF 1000000000
using namespace std;
int r[MAXN];
int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN];
int sa[MAXN]; //index range 1~n value range 0~n-1
int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb, *ws = tmp;
    for (i = 0; i < m; i++) ws[i] = 0;
    for (i = 0; i < n; i++) ws[x[i] = r[i]]++;
    for (i = 1; i < m; i++) ws[i] += ws[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p)
    {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) ws[i] = 0;
        for (i = 0; i < n; i++) ws[wv[i]]++;
        for (i = 1; i < m; i++) ws[i] += ws[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
}
int rank[MAXN]; //index range 0~n-1 value range 1~n
int height[MAXN]; //index from 1   (height[1] = 0)
void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; ++i) rank[sa[i]] = i;
    for (i = 0; i < n; height[rank[i++]] = k)
        for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k);
    return;
}
char s[MAXN];
int main()
{
    int T;
    scanf("%d", &T);
    while(T--)
    {
        scanf("%s", s);
        int n = strlen(s);
        int m = 0;
        for(int i = 0; i < n; i++)
        {
            r[i] = (int)s[i];
            m = max(m, r[i]);
        }
        r[n] = 0;
        da(r, sa, n + 1, m + 1);
        calheight(r, sa, n);
        long long ans = 0;
        for(int i = 1; i <= n; i++) ans += n - sa[i] - height[i];
        printf("%lld\n", ans);
    }
    return 0;
}


 

URAL 1297  求最长回文串

假设原串为S,将原串倒置后是T。

建立一个新串S+“~”+T

然后对新串做后缀数组。

然后我们枚举的是回文串的中心。

假设中心的位置为i。

有两种情况

回文为奇数

那么求lcp(i, n - i - 1)

回文为偶数那么求lcp(i, n - i)

然后更新最优解即可

用手画一画就知道是什么意思了。

 

#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
#include <set>
#include <queue>
#include <algorithm>
#define MAXN 111111
#define MAXM 111
#define INF 1000000000
using namespace std;
int r[MAXN];
int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN];
int sa[MAXN]; //index range 1~n value range 0~n-1
int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb, *ws = tmp;
    for (i = 0; i < m; i++) ws[i] = 0;
    for (i = 0; i < n; i++) ws[x[i] = r[i]]++;
    for (i = 1; i < m; i++) ws[i] += ws[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p)
    {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) ws[i] = 0;
        for (i = 0; i < n; i++) ws[wv[i]]++;
        for (i = 1; i < m; i++) ws[i] += ws[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
}
int rank[MAXN]; //index range 0~n-1 value range 1~n
int height[MAXN]; //index from 1   (height[1] = 0)
void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; ++i) rank[sa[i]] = i;
    for (i = 0; i < n; height[rank[i++]] = k)
        for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k);
    return;
}
int Log[MAXN];
int mi[MAXN][20];
void rmqinit(int n)
{
    for(int i = 1; i <= n; i++) mi[i][0] = height[i];
    int m = Log[n];
    for(int i = 1; i <= m; i++)
        for(int j = 1; j <= n; j++)
        {
            mi[j][i] = mi[j][i - 1];
            if(j + (1 << (i - 1)) <= n) mi[j][i] = min(mi[j][i], mi[j + (1 << (i - 1))][i - 1]);
        }
}
int lcp(int a, int b)
{
    a = rank[a];    b = rank[b];
    if(a > b) swap(a,b);
    a ++;
    int t = Log[b - a + 1];
    return min(mi[a][t] , mi[b - (1<<t) + 1][t]);
}
char s[MAXN * 2];
int main()
{
    Log[1] = 0;
    for(int i = 2; i < MAXN; i++) Log[i] = Log[i >> 1] + 1;
    while(scanf("%s", s) != EOF)
    {
        int len = strlen(s);
        for(int i = 0; i < len; i++) r[i] = (int)s[i];
        r[len] = 128;
        for(int i = 0; i < len; i++) r[len + 1 + i] = (int)s[len - 1 - i];
        int n = 2 * len + 1;
        r[n] = 0;
        da(r, sa, n + 1, 130);
        calheight(r, sa, n);
        rmqinit(n);
        int ans = 0;
        int pos;
        for(int i = 0; i < len; i++)
        {
            int tmp = lcp(i, n - i - 1); //奇数
            if(tmp * 2 - 1 > ans)
            {
                ans= tmp * 2 - 1;
                pos = i - tmp + 1;
            }
            tmp = lcp(i, n - i); //偶数
            if(tmp * 2 > ans)
            {
                ans = tmp * 2;
                pos = i - tmp;
            }
        }
        for(int i = 0; i < ans; i++) putchar(s[pos + i]);
        puts("");
    }
    return 0;
}


 

POJ 2406  

给定一个字符串S,已知该串是由某串重复K次 连接得到的。

求最大的k

这题的话。 貌似POJ上暴力跑的很快。

用后缀数组需要的求是枚举子串的长度。

假设长度为len, 那么检查lcp(0, len)是否等于n - len即可

倍增在这里被卡掉了

用的DC3


 

#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
#include <set>
#include <queue>
#include <cmath>
#include <algorithm>
#define MAXN 1111111
#define MAXM 111
#define INF 1000000000
#define F(x) ((x)/3+((x)%3==1?0:tb))
#define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2)
using namespace std;
int wa[MAXN] , wb[MAXN] , wv[MAXN] , tmp[MAXN];
int c0(int *r, int a, int b){
    return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2];
}
int c12(int k, int *r, int a, int b){
    if (k == 2)
    return r[a] < r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1);
    else return r[a] < r[b] || r[a] == r[b] && wv[a + 1] < wv[b + 1];
}
void sort(int *r, int *a, int *b, int n, int m)
{
    int i;
    for (i = 0; i < n; i++) wv[i] = r[a[i]];
    for (i = 0; i < m; i++) tmp[i] = 0;
    for (i = 0; i < n; i++) tmp[wv[i]]++;
    for (i = 1; i < m; i++) tmp[i] += tmp[i-1];
    for (i = n-1; i >= 0; i--) b[--tmp[wv[i]]] = a[i];
}
void dc3(int *r, int *sa, int n, int m)
{
    int i, j, *rn = r + n;
    int *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p;
    r[n] = r[n + 1] = 0;
    for (i = 0; i < n; i++) if (i % 3 != 0) wa[tbc++] = i;
    sort(r + 2, wa, wb, tbc, m);
    sort(r + 1, wb, wa, tbc, m);
    sort(r, wa, wb, tbc, m);
    for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
        rn[F(wb[i])] = c0(r, wb[i-1], wb[i]) ? p-1 : p++;
    if (p < tbc) dc3(rn, san, tbc, p);
    else for (i = 0; i < tbc; i++) san[rn[i]] = i;
    for (i = 0; i < tbc; i++) if (san[i] < tb) wb[ta++] = san[i] * 3;
    if (n % 3 == 1) wb[ta++] = n-1;
    sort(r, wb, wa, ta, m);
    for (i = 0; i < tbc; i++) wv[wb[i] = G(san[i])] = i;
    for (i = 0, j = 0, p = 0; i < ta && j < tbc; p++)
        sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++];
    for (; i < ta; p++) sa[p] = wa[i++];
    for (; j < tbc; p++) sa[p] = wb[j++];
}
void da(int str[], int sa[], int rank[], int height[], int n, int m)
{
//       for (int i = n; i < n * 3; i++)
//        str[i] = 0;
    dc3 (str , sa , n + 1 , m);
    int i, j, k;
    for (i = 0; i < n; i++){
        sa[i] = sa[i + 1];
        rank[sa[i]] = i;
    }
    for (i = 0, j = 0, k = 0; i < n; height[rank[i ++]] = k)
        if (rank[i] > 0)
            for (k ? k--: 0 , j = sa[rank[i]-1];
            i + k < n && j + k < n && str[i + k] == str[j + k];
            k++);
}
int lcp[MAXN];
int r[MAXN];
int  sa[MAXN], rank[MAXN] , height[MAXN];
int n;
void getlcp()
{
    int k = rank[0];
    lcp[k] = n;
    for(int i = k; i >= 2; i--)
        lcp[i - 1] = min(lcp[i], height[i]);
    for(int i = k + 1; i <= n; i++)
        lcp[i] = min(lcp[i - 1], height[i]);
}
char s[MAXN];
bool ok(int k)
{
    int rk = rank[k];
    if(lcp[rk] == n - k) return true;
    return false;
}
int main()
{
    while(gets(s))
    {
        if(s[0] == '.') break;
        n = strlen(s);
        for(int i = 0; i <= n; i++) r[i] = s[i];
        da(r, sa, rank, height, n + 1, 130);
        getlcp();
        int tmp = (int)sqrt(n + 0.5);
        int ans = 0;
        for(int i = 1; i <= tmp; i++)
        {
            if(n % i != 0) continue;
            if(ok(i)) ans = max(ans, n / i);
            if(ok(n / i)) ans = max(ans, i);
        }
        printf("%d\n", ans);
    }
    return 0;
}


 

 

posted @ 2013-10-14 09:22  Class Xman  阅读(194)  评论(0)    收藏  举报
刷新页面返回顶部
博客园  ©  2004-2025
浙公网安备 33010602011771号 浙ICP备2021040463号-3