hdu 6194 沈阳网络赛--string string string(后缀数组)

题目链接

 

Problem Description
Uncle Mao is a wonderful ACMER. One day he met an easy problem, but Uncle Mao was so lazy that he left the problem to you. I hope you can give him a solution.
Given a string s, we define a substring that happens exactly k times as an important string, and you need to find out how many substrings which are important strings.
 

 

Input
The first line contains an integer T (T100) implying the number of test cases.
For each test case, there are two lines:
the first line contains an integer k (k1) which is described above;
the second line contain a string s (length(s)105).
It's guaranteed that length(s)2106.
 

 

Output
For each test case, print the number of the important substrings in a line.
 

 

Sample Input
2
2
abcabc
3
abcabcabcabc
 

 

Sample Output
6
9
 
 
题意:有一个字符串s,求其中恰好出现k次的子串有多少个?
 
思路:后缀数组,通过后缀数组算法可以知道每个后缀的排名,如果有某个子串恰好出现k次,那么必定有k个对应的后缀 即这个子串是这k个后缀串的前缀,那么这k个后缀串的排名一定是连续的,所以我们按排名从1~len(s)依次开始 取连续k个后缀串,可以根据height[]数组快速算出当前这k个后缀串的最大公共前缀长度len,那么长为1到len的前缀子串,这k个串都含有,设当前开始k个串为 i到i+k-1 ,那么如果子串长过短,可能 i-1 或 i+k 这个串也含有相应的子串,所以计算出 i 和 i-1 串,i+k和i+k-1的最大公共前缀长为m,那么之前取的子串长必须大于m才能保证 i-1 和 i+k 不含有相应的子串,只有i~i+k-1这k个串含有相应的子串。
 
代码如下:
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>
#include <cmath>
using namespace std;
typedef long long LL;
const int N=1e5+5;
char s[N];
int k;
int wa[N],wb[N],wv[N],wss[N];
int sa[N],ran[N],height[N];
int f[N][20];

int cmp(int *r,int a,int b,int l)
{
    return r[a]==r[b]&&r[a+l]==r[b+l];
}
void da(char *r,int *sa,int n,int m)
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++) wss[i]=0;
    for(i=0; i<n; i++) wss[x[i]=(int)r[i]]++;
    for(i=1; i<m; i++) wss[i]+=wss[i-1];
    for(i=n-1; i>=0; i--) sa[--wss[x[i]]]=i;
    for(j=1,p=1; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++) y[p++]=i;
        for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;

        for(i=0; i<n; i++) wv[i]=x[y[i]];
        for(i=0; i<m; i++) wss[i]=0;
        for(i=0; i<n; i++) wss[wv[i]]++;
        for(i=1; i<m; i++) wss[i]+=wss[i-1];
        for(i=n-1; i>=0; i--) sa[--wss[wv[i]]]=y[i];

        for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
    }
    return;
}
void callheight(char *r,int *sa,int n)
{
    int i,j,k=0;
    for(i=1;i<=n;i++)
        ran[sa[i]]=i;
    for(i=0;i<n;height[ran[i++]]=k)
    for(k?k--:0,j=sa[ran[i]-1];r[i+k]==r[j+k];k++);
    return ;
}
void init(int len)
{
    for(int i=1;i<=len;i++) f[i][0]=height[i];
    for(int s=1;(1<<s)<=len;s++)
    {
        int tmp=(1<<s);
        for(int i=1;i+tmp-1<=len;i++)
        {
            f[i][s]=min(f[i][s-1],f[i+tmp/2][s-1]);
        }
    }
}
int cal(int l,int r)
{
    int len=log2(r-l+1);
    int ans=min(f[l][len],f[r-(1<<len)+1][len]);
    return ans;
}
int main()
{
    int T; cin>>T;
    while(T--)
    {
       scanf("%d%s",&k,s);
       int len=strlen(s);
       da(s,sa,len+1,130);
       callheight(s,sa,len);
       init(len);
       int ans=0;
       for(int i=1;i+k-1<=len;i++)
       {
           int j=i+k-1;
           int tmp=height[i];
           if(j+1<=len) tmp=max(tmp,height[j+1]);
           int x;
           if(k!=1) { x=cal(i+1,j); }
           else x=len-sa[i];
           ans+=max(0,x-tmp);
       }
       printf("%d\n",ans);
    }
    return 0;
}

 

 
 
posted @ 2017-09-11 18:58 茶飘香~ 阅读(...) 评论(...) 编辑 收藏