spoj694 DISUBSTR - Distinct Substrings

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA: 
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

题目大意:给定一个字符串,求不同的子串数,子串是连续的.

分析:每个子串是对应后缀的前缀,利用后缀数组.

   求出sa和ht数组. 对于每个sa[i],它能和它本身以及后面的字符形成子串,如果固定sa[i]为子串的左端点,那么它能形成n-sa[i]个子串. 所有的子串加起来等于Σn - sa[i] = n*(n + 1) / 2.

   这样统计会将某些子串重复统计. 因为ht数组计算的是排好序的两个相邻后缀的LCP,如果有重叠部分,那么一定是最大的.对于每一个sa[i],他会重复计算ht[i]个子串(固定了左端点嘛,这一段的右端点也是一样的,那么就会重复计算了).减掉就好了.

#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>

using namespace std;

const int maxn = 2010;
int n,ans,fir[maxn],sec[maxn],pos[maxn],sa[maxn],rk[maxn],tong[maxn],ht[maxn];
int sett[maxn],a[maxn],cnt,K,T;
char s[maxn];

void solve()
{
    int len = n;
    memset(rk,0,sizeof(rk));
    memset(sa,0,sizeof(sa));
    memset(ht,0,sizeof(ht));
    memset(fir,0,sizeof(fir));
    memset(sec,0,sizeof(sec));
    memset(pos,0,sizeof(pos));
    memset(tong,0,sizeof(tong));
    copy(s + 1,s + len + 1,sett + 1);
    sort(sett + 1,sett + 1 + len);
    cnt = unique(sett + 1,sett + 1 + len) - sett - 1;
    for (int i = 1; i <= len; i++)
        a[i] = lower_bound(sett + 1,sett + 1 + cnt,s[i]) - sett;
    for (int i = 1; i <= len; i++)
        tong[a[i]]++;
    for (int i = 1; i <= len; i++)
        tong[i] += tong[i - 1];
    for (int i = 1; i <= len; i++)
        rk[i] = tong[a[i] - 1] + 1;
    for (int t = 1; t <= len; t *= 2)
    {
        for (int i = 1; i <= len; i++)
            fir[i] = rk[i];
        for (int i = 1; i <= len; i++)
        {
            if (i + t > len)
                sec[i] = 0;
            else
                sec[i] = rk[i + t];
        }
        fill(tong,tong + 1 + len,0);
        for (int i = 1; i <= len; i++)
            tong[sec[i]]++;
        for (int i = 1; i <= len; i++)
            tong[i] += tong[i - 1];
        for (int i = 1; i <= len; i++)
            pos[len - --tong[sec[i]]] = i;
        fill(tong,tong + 1 + len,0);
        for (int i = 1; i <= len; i++)
            tong[fir[i]]++;
        for (int i = 1; i <= len; i++)
            tong[i] += tong[i - 1];
        for (int i = 1; i <= len; i++)
        {
            int temp = pos[i];
            sa[tong[fir[temp]]--] = temp;
        }
        bool flag = true;
        int last = 0;
        for (int i = 1; i <= len; i++)
        {
            int temp = sa[i];
            if (!last)
                rk[temp] = 1;
            else if (fir[temp] == fir[last] && sec[temp] == sec[last])
            {
                rk[temp] = rk[last];
                flag = false;
            }
            else
                rk[temp] = rk[last] + 1;
            last = temp;
        }
        if (flag)
            break;
    }
    int k = 0;
    for (int i = 1; i <= len; i++)
    {
        if (rk[i] == 1)
            k = 0;
        else
        {
            if (k)
                k--;
            int j = sa[rk[i] - 1];
            while (i + k <= len && j + k <= len && a[i + k] == a[j + k])
                k++;
        }
        ht[rk[i]] = k;
    }
}

int main()
{
    scanf("%d",&T);
    while (T--)
    {
        scanf("%s",s + 1);
        n = strlen(s + 1);
        solve();
        ans = n * (n + 1) / 2;
        for (int i = 1; i <= n; i++)
            ans -= ht[i];
        printf("%d\n",ans);
    }

    return 0;
}

 

posted @ 2018-03-11 20:48  zbtrs  阅读(147)  评论(0编辑  收藏  举报