POJ 3294 Life Forms

题目大意:

给出N(N不超过100)个字符串, 求出最长的一个字符串, 使它是超过N / 2个字符串的子串.

 

简要分析:

显然还是可以用哈希搞, 但是哈希这种办法太落后了, 而且本身并不是个完美的算法, 因为不可能完美哈希. 于是学习了后缀数组.

简单说一下后缀数组, 它把长为L的字符串的L个后缀排序了, sa[i]记录排第i名的后缀是哪一个, rank[i]记录i这个后缀的排名, height[i]记录排名为i的后缀与排名为i - 1的后缀的最长公共前缀(LCP). 这些东西的算法在论文里面都有, 详见2009年国家集训队论文集. 一个NB的结论是, 求i和j两个后缀的最长公共前缀, 不妨设rank[i] < rank[j], 则答案为height中下标从rank[i] + 1到rank[j]的最小值. 脑补一下, height是按后缀的字典序来的, 求LCP的过程有点像渐变, 于是就是最小值了.

先把所有字符串连起来, 中间用个奇葩的符号作为分隔, 求一次后缀数组. 然后二分答案M, 再把height数组分组, 使得每一组内的后缀两两间的LCP都不小于M, 这个分组出来肯定是一个个区间, 因为上边提到LCP与height数组有某种联系. 然后就只用看每一组内的后缀的来源(就是属于哪个串)是否超过N / 2就可以了.

 

代码实现:

View Code
  1 #include <cstdio>
2 #include <cstdlib>
3 #include <cstring>
4 #include <vector>
5 #include <algorithm>
6 using namespace std;
7
8 const char spl[] = "#";
9 const int MAX_N = 100, LEN = 1000, MAX_LEN = MAX_N * LEN + MAX_N - 1;
10 int n, sz, sa[MAX_LEN + 1], rank[MAX_LEN + 1], height[MAX_LEN + 1], cnt[MAX_LEN + 1];
11 char tmp[LEN + 2], buf[MAX_LEN + 2];
12 int belong[MAX_LEN + 1];
13 vector <int> ans;
14
15 struct node_t {
16 int v[2], p;
17 bool operator == (const node_t &t) const {
18 return v[0] == t.v[0] && v[1] == t.v[1];
19 }
20 } nd[MAX_LEN + 1], tp[MAX_LEN + 1];
21
22 void ra(int b) {
23 for (int i = 1; i >= 0; i --) {
24 memset(cnt, 0, sizeof(int) * (b + 1));
25 for (int j = 1; j <= sz; j ++) cnt[nd[j].v[i]] ++;
26 for (int j = 1; j <= b; j ++) cnt[j] += cnt[j - 1];
27 for (int j = sz; j >= 1; j --) tp[cnt[nd[j].v[i]] --] = nd[j];
28 memcpy(nd, tp, sizeof(node_t) * (sz + 1));
29 }
30 for (int i = 1, j = 1, k = 1; i <= sz; i = j, k ++)
31 while (j <= sz && nd[j] == nd[i]) rank[nd[j ++].p] = k;
32 }
33
34 bool check(int len, bool t) {
35 static bool vis[MAX_N + 1];
36 for (int i = 1; i <= sz; ) {
37 memset(vis, 0, sizeof(vis));
38 int j = i;
39 while (j + 1 <= sz && height[j + 1] >= len) {
40 if (belong[sa[j]] == belong[sa[j] + len - 1])
41 vis[belong[sa[j]]] = 1;
42 j ++;
43 }
44 if (belong[sa[j]] == belong[sa[j] + len - 1])
45 vis[belong[sa[j]]] = 1;
46 int cnt = 0;
47 for (int k = 1; k <= n; k ++) cnt += vis[k];
48 if (cnt > n / 2) {
49 if (!t) return 1;
50 ans.push_back(sa[i]);
51 }
52 i = j + 1;
53 }
54 return 0;
55 }
56
57 int main() {
58 bool fir = 1;
59 while (scanf("%d", &n) != EOF && n) {
60 if (fir) fir = 0;
61 else printf("\n");
62 memset(buf, 0, sizeof(buf));
63 int minlen = ~0U >> 1;
64 for (int i = 0; i < n; i ++) {
65 if (i) strcat(buf + 1, spl);
66 scanf("%s", tmp + 1);
67 minlen = min(minlen, (int)strlen(tmp + 1));
68 strcat(buf + 1, tmp + 1);
69 }
70 sz = strlen(buf + 1);
71 memset(belong, 0, sizeof(belong));
72 for (int i = 1, j = 1; i <= sz; i ++) {
73 if (buf[i] == '#') j ++;
74 else belong[i] = j;
75 }
76 for (int i = 1; i <= sz; i ++) nd[i].v[0] = buf[i], nd[i].v[1] = 0, nd[i].p = i;
77 ra(255);
78 for (int s = 1; s < sz; s <<= 1) {
79 for (int i = 1; i <= sz; i ++) {
80 nd[i].v[0] = rank[i], nd[i].v[1] = i + s <= sz ? rank[i + s] : 0;
81 nd[i].p = i;
82 }
83 ra(sz);
84 }
85 for (int i = 1; i <= sz; i ++) sa[rank[i]] = i;
86 for (int i = 1, j, k = 0; i <= sz; height[rank[i ++]] = k)
87 for (k ? k -- : 0, j = sa[rank[i] - 1]; buf[i + k] == buf[j + k]; k ++);
88
89 int lb = 0, rb = minlen + 1;
90 while (lb + 1 < rb) {
91 int mid = (lb + rb) >> 1;
92 if (check(mid, 0)) lb = mid;
93 else rb = mid;
94 }
95 if (lb) {
96 ans.clear();
97 check(lb, 1);
98 for (int i = 0, b = ans.size(); i < b; i ++) {
99 if (belong[ans[i]] != belong[ans[i] + lb - 1]) continue;
100 for (int j = 0; j < lb; j ++) printf("%c", buf[ans[i] + j]);
101 printf("\n");
102 }
103 }
104 else printf("?\n");
105 }
106 return 0;
107 }
posted @ 2012-03-09 22:57  zcwwzdjn  阅读(912)  评论(0编辑  收藏  举报