后缀数组的学习不得不告一段落了,还有好多要学的东西呢,最重要的是没有那么多时间了,人生苦短啊!
总的来说学的还比较顺利,除了没有深入研究以外个人感觉还行,做题的时候发现了一个奇怪的现象,别人认为很简单的题我就是做不会,别人认为很难得我却能很快找到思路,比如pku3294就是后者,好吧,就从这儿说起。
这一题的意思的是给你n个字符串,让你求最长的至少在超过一半的串中出现的子串。一看到这个题就想到了height数组分组,先二分答案,找到最大的len值,然后再输出满足len的所有组的子串(很简单的思路,不知道为啥网上给这道题定位为难,感觉3729的思路都比他难想)。
想想也没啥好说的了,代码如下:
#pragma warning(disable:4786)
#include<stdio.h>
#include<string>
#include<iostream>
#include<set>
using namespace std;
set<string> s;
#define maxm 110000
#define maxn 110000
int sa[maxn],height[maxn],bar[maxm],Rank[maxn],Rank_f[maxn],Result_s[maxn];
int visit[1005],map[maxn];
bool cmp(int *r,int a,int b,int len)
{
return r[a] == r[b]&&r[a+len] == r[b+len];
}
void get_sa(int *r,int n)
{
int i,j,p,*rank = Rank,*rank_f = Rank_f,*result_s = Result_s,*t,m = maxm-2;
for (i = 0; i <= m; i++) bar[i] = 0;
for (i = 0; i < n; i++) bar[rank[i] = r[i]]++;
for (i = 0; i< m; i++) bar[i+1] += bar[i];
for (i = n-1; i>= 0; i--) sa[--bar[rank[i]]] = i;
for (j = 1,p = 1; p < n; j *= 2,m = p){
for (p = 0,i = n-j; i < n; i++) result_s[p++] = i;
for (i = 0; i< n; i++) if (sa[i] >= j) result_s[p++] = sa[i] -j;
for (i = 0; i < n; i++) rank_f[i] = rank[result_s[i]];
for (i = 0; i<=m; i++) bar[i] = 0;
for (i = 0; i< n; i++) bar[rank_f[i]]++;
for (i = 0; i< m; i++) bar[i+1] += bar[i];
for (i = n-1; i >= 0; i--) sa[--bar[rank_f[i]]] = result_s[i];
t = result_s; result_s = rank; rank = t;
for (rank[sa[0]] = 0,i = 1,p = 1; i < n; i++)
rank[sa[i]] = cmp(result_s,sa[i],sa[i-1],j)?p-1:p++;
}
}
void get_height(int *r,int n)
{
int i,j,*rank = Rank,len = 0;
for (i = 0; i< n; i++) rank[sa[i]] = i;
height[0] = 0;
for (i = 0; i < n-1; i++){
if (len != 0) len--;
for (j = sa[rank[i] -1]; r[j+len] == r[i+len]; len++);
height[rank[i]] = len;
}
}
bool check(int mid,int len,int n)
{
int num,i,k = 1; memset(visit,0,sizeof(visit));
num = 1; visit[map[sa[1]]] = 1;
for (i = 2; i < len; i++){
if (height[i] >= mid){
if (visit[map[sa[i]]] != k) {num++; visit[map[sa[i]]] = k;}
if (num >= n) return true;
}
else{
num = 1; k++; visit[map[sa[i]]] = k;
}
}
return false;
}
void solve(int *r,int mid,int len,int n)
{
int num,i,j,k = 1; memset(visit,0,sizeof(visit));
num = 1; visit[map[sa[1]]] = 1;
for (i = 2; i < len; i++){
if (height[i] >= mid){
if (visit[map[sa[i]]] != k) {num++; visit[map[sa[i]]] = k;}
}
else{
if (num >= n){
for (j = sa[i-1]; j < sa[i-1] + mid; j++)
printf ("%c",r[j] - 1 + 'a');
printf ("\n");
}
num = 1; k++; visit[map[sa[i]]] = k;
}
}
}
int main()
{
int n,i,j,k,len,r[110000],max = 0;
char str[1100]; bool f=false;
while (scanf ("%d",&n) && n){
if (f) printf ("\n");
else f = true;
for (j = 0,i = 1; i<= n; i++){
memset(str,0,sizeof(str));
scanf ("%s",&str); len = strlen(str); max = max>len?max:len;
if (i > 1) r[j++] = 1000+i;
for (k = 0; k < len; k++){
r[j++] = str[k] - 'a' + 1;
map[j-1] = i;
}
}
r[j++] = 0; len = j;
get_sa(r,len); get_height(r,len); len = len-n+1;
int start = 1,end = max,mid,sum = n/2+1;//二分查找函数
while (start <= end){
mid = (start+end)/2;
if (check(mid,len,sum)) start = mid+1;
else end = mid-1;
}
if (end == 0) printf ("?\n");
else solve(r,end,len,sum);
s.clear();
}
return 0;
}
浙公网安备 33010602011771号