HDU 4622 Reincarnation 后缀自动机 // BKDRHash(最优hash)

Reincarnation

Time Limit: 6000/3000 MS (Java/Others)    Memory Limit: 131072/65536 K (Java/Others)

Problem Description
Now you are back,and have a task to do:
Given you a string s consist of lower-case English letters only,denote f(s) as the number of distinct sub-string of s.
And you have some query,each time you should calculate f(s[l...r]), s[l...r] means the sub-string of s start from l end at r.
 

 

Input
The first line contains integer T(1<=T<=5), denote the number of the test cases.
For each test cases,the first line contains a string s(1 <= length of s <= 2000).
Denote the length of s by n.
The second line contains an integer Q(1 <= Q <= 10000),denote the number of queries.
Then Q lines follows,each lines contains two integer l, r(1 <= l <= r <= n), denote a query.
 

 

Output
For each test cases,for each query,print the answer in one line.
 

 

Sample Input
2 bbaba 5 3 4 2 2 2 5 2 4 1 4 baaba 5 3 3 3 4 1 4 3 5 5 5
 

 

Sample Output
3 1 7 5 8 1 3 8 5 1
Hint
I won't do anything against hash because I am nice.Of course this problem has a solution that don't rely on hash.
 

题意:

  给你一个母串,

  Q个询问,每次询问你[L,R] 属于这一段中不同子串的个数是多少

题解:

  考虑离线

  把询问缩小,相同L的询问划分为一类

  这样最多就是建立 2000 个后缀自动机了

#include <bits/stdc++.h>
inline long long read(){long long x=0,f=1;char ch=getchar();while(ch<'0'||ch>'9'){if(ch=='-')f=-1;ch=getchar();}while(ch>='0'&&ch<='9'){x=x*10+ch-'0';ch=getchar();}return x*f;}
using namespace std;

const int N = 2e3+7;

const long long mod = 1000000007;

long long now;
int isPlus[N * 2],endpos[N * 2];int d[N * 2];
int tot,slink[2*N],trans[2*N][28],minlen[2*N],maxlen[2*N],pre;
int newstate(int _maxlen,int _minlen,int* _trans,int _slink){
    maxlen[++tot]=_maxlen;minlen[tot]=_minlen;
    slink[tot]=_slink;
    if(_trans)for(int i=0;i<26;i++)trans[tot][i]=_trans[i],d[_trans[i]]+=1;
    return tot;
}
long long update(int u) {
    return 1LL*(maxlen[u] - minlen[u] + 1);
}
int add_char(char ch,int u){
    int c=ch-'a',v=u;
    int z=newstate(maxlen[u]+1,-1,NULL,0);
    isPlus[z] = 1;
    while(v&&!trans[v][c]){trans[v][c]=z;d[z]+=1;v=slink[v];}
    if(!v){ minlen[z]=1;slink[z]=1;now += update(z);return z;}
    int x=trans[v][c];
    if(maxlen[v]+1==maxlen[x]){slink[z]=x;minlen[z]=maxlen[x]+1;now += update(z);return z;}
    int y=newstate(maxlen[v]+1,-1,trans[x],slink[x]);
    now -= update(x);
    slink[z]=slink[x]=y;minlen[x]=minlen[z]=maxlen[y]+1;
    now += update(x);
    while(v&&trans[v][c]==x){trans[v][c]=y;d[x]--,d[y]++;v=slink[v];}
    minlen[y]=maxlen[slink[y]]+1;
    now += update(y);now += update(z);
    return z;
}
void init_sam() {
    for(int i = 1; i <= tot; ++i)
        for(int j = 0; j < 26; ++j) trans[i][j] = 0;
    pre = tot = 1;

}
int T,n;
long long ans[20000];
char a[N * 2];
struct ss{int L,R,id;}Q[20000];
int cmp(ss s1,ss s2) {
    if(s1.L == s2.L)return s1.R < s2.R;
    return s1.L < s2.L;
}
int main() {
    scanf("%d",&T);
    while(T--) {
        scanf("%s%d",a+1,&n);
        for(int i = 1; i <= n; ++i)
            scanf("%d%d",&Q[i].L,&Q[i].R),Q[i].id = i;
        sort(Q+1,Q+n+1,cmp);
        int l = 1,r = 0;
        for(int i = 1; i <= n; ++i) {
            if(Q[i].L != Q[i-1].L) {init_sam();
                l = Q[i].L,r = l-1;
                now = 0;
            }
            while(r < Q[i].R){
                pre = add_char(a[(++r)],pre);
            }
            ans[Q[i].id] = now;
        }
        for(int i = 1; i <= n; ++i) printf("%lld\n",ans[i]);
    }
    return 0;
}

 

 

BKDRHash

#include <cstdio>
#include <cstdlib>
#include <cstring>
typedef unsigned long long int ULL;
//BKDRHash,最优的字符串hash算法。hash一开始是等于0的
const int seed = 13131; // 31 131 1313 13131 131313 etc..
const int maxn = 2000+10;
char str[maxn];
ULL powseed[maxn]; // seed的i次方 爆了也没所谓,sumHash的也爆。用了ULL,爆了也没所谓,也能唯一确定它,无符号
ULL sumHash[maxn]; //前缀hash值
int ans[maxn][maxn]; //ans[L][R]就代表ans,就是区间[L,R]内不同子串的个数
const int MOD = 10007;
struct StringHash
{
    int first[MOD+2],num; 
    ULL EdgeNum[maxn]; // 表明第i条边放的数字(就是sumHash那个数字)
    int next[maxn],close[maxn]; //close[i]表示与第i条边所放权值相同的开始的最大位置
    //就比如baba,现在枚举长度是2,开始的时候ba,close[1] = 1;表明"ba"开始最大位置是从1开始
    //然后枚举到下一个ba的时候,close[1]就要变成3了,开始位置从3开始了
    void init ()
    {
        num = 0; memset (first,0,sizeof first);
        return ;
    }
    int insert (ULL val,int id) //id是用来改变close[]的
    {
        int u = val % MOD; 
        for (int i = first[u]; i ; i = next[i]) //存在边不代表出现过,出现过要用val判断,val才是唯一的,边还是压缩后(%MOD)的呢
        {
            if (val == EdgeNum[i]) //出现过了
            {
                int t = close[i]; close[i] = id;//更新最大位置
                return t;
            }
        }
        ++num; //没出现过的话,就加入图吧
        EdgeNum[num] = val; // 这个才是精确的
        close[num] = id;
        next[num] = first[u];
        first[u] = num;
        return 0;//没出现过
    }
}H;
void work ()
{
    scanf ("%s",str+1);
    int lenstr = strlen(str+1);
    for (int i=1;i<=lenstr;++i)
        sumHash[i] = sumHash[i-1]*seed + str[i];
    memset(ans,0,sizeof(ans));
    for (int L=1;L<=lenstr;++L) //暴力枚举子串长度
    {
        H.init();
        for (int i=1;i+L-1<=lenstr;++i)
        {
            int pos = H.insert(sumHash[i+L-1]-powseed[L]*sumHash[i-1],i);
            ans[i][i+L-1] ++;//ans[L][R]++,自己是一个
            ans[pos][i+L-1]--;//pos放回0是没用的
    
        }
    }
    for (int i = lenstr; i>=1; i--)
    {
        for (int j=i;j<=lenstr;j++)
        {
            ans[i][j] += ans[i+1][j]+ans[i][j-1]-ans[i+1][j-1];
        }
    }
    int m;
    scanf ("%d",&m);
    while (m--)
    {
        int L,R;
        scanf ("%d%d",&L,&R);
        printf ("%d\n",ans[L][R]);
    }
    return ;
}
int main ()
{
    powseed[0] = 1;
    for (int i = 1; i <= maxn-20; ++i) powseed[i] = powseed[i-1] * seed;
    int t;
    scanf ("%d",&t);
    while (t--) work();
    return 0;
}

 

posted @ 2017-09-30 15:20  meekyan  阅读(291)  评论(0编辑  收藏  举报