UVA 11107 Life Forms——(多字符串的最长公共子序列,后缀数组+LCP)

题意: 输入n个序列,求出一个最大长度的字符串,使得它在超过一半的DNA序列中连续出现。如果有多解,按照字典序从小到大输出所有解。

分析:这道题的关键是将多个字符串连接成一个串,方法是用不同的分隔符把所有原串拼接起来。接下来,就可以求这个新串的后缀数组和 height 数组, 然后二分答案,没次只需判断是非有一个长度为p的串在超过一半的串中出现过,判断方法是扫描一遍height数组,把它分成若干段,每当height[i] < p时,开辟一个新段,然后判断之前段是否包含了超过 n/2个原串后缀,那么当前的p值满足条件(注意n = 1时要特判

详见代码:

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <map>
#include <vector>
using namespace std;

const int maxn = 104;
const int maxm = 1005;
char s[maxn*maxm];
int sa[maxn*maxm], t[maxn*maxm], t2[maxn*maxm], c[maxn*maxm];

int N;
void build_sa(int m) {
    int* x = t, *y = t2;
    for(int i = 0; i < m; i++) c[i] = 0;
    for(int i = 0; i < N; i++) c[x[i] = s[i]]++;
    for(int i = 1; i < m; i++) c[i] += c[i-1];
    for(int i = N-1; i >= 0; i--) sa[--c[x[i]]] = i;
    for(int k = 1; k <= N; k <<= 1) {
        int p = 0;
        for(int i = N-k; i < N; i++) y[p++] = i;
        for(int i = 0; i < N; i++) if(sa[i] >= k) y[p++] = sa[i] - k;
        for(int i = 0; i < m; i++) c[i] = 0;
        for(int i = 0; i < N; i++) c[x[y[i]]]++;
        for(int i = 1; i < m; i++) c[i] += c[i-1];
        for(int i = N-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
        swap(x, y);
        p = 1;
        x[sa[0]] = 0;
        for(int i = 1; i < N; i++)
            x[sa[i]] = (y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k] ? p-1 :p++);
        if(p >= N) break;
        m = p;
    }
}
int rnk[maxn*maxm], height[maxn*maxm];
void get_height() {
    int k = 0;
    for(int i = 0; i < N; i++) rnk[sa[i]] = i;
    for(int i = 0; i < N; i++) {
        if(!rnk[i]) continue;
        int j = sa[rnk[i]-1];
        if(k) k--;
        while(s[i+k] == s[j+k]) k++;
        height[rnk[i]] = k;
    }
}

int n;
char s2[maxm];
int sign[maxn];
int mlen;
vector<int> A;
int flag[maxn];
map<char, int> Map;
bool find(int p, vector<int> &A) { //判断当前长度p是否符合要求
    memset(flag, 0, sizeof flag);
    bool OK = false;
    int cnt = 0;
    int start = 0;
    int t = lower_bound(sign, sign+n, sa[start]) - sign;
    if(!Map.count(s[sa[start]]))
        cnt++;
    flag[t] = start;
    for(int i = 1; i < N; i++) {
        if(height[i] >= p) {
            t = lower_bound(sign, sign+n, sa[i]) - sign;
            if(!Map.count(s[sa[i]]) && flag[t] < start)
                cnt++;
            flag[t] = i;
            if(i == N-1 && cnt > n/2){
                OK = true;
                A.push_back(sa[start]);
            }
        }
        else {
            if(cnt > n/2) {
                OK = true;
                A.push_back(sa[start]);
            }
            cnt = 0;
            start = i;
            int t = lower_bound(sign, sign+n, sa[start]) - sign;
            if(!Map.count(s[sa[start]]))
                cnt++;
            flag[t] = start;
        }
    }
    return OK;
}
int cnt;
char gen_sign() { //生成分隔符并记录
    int i = 1;
    for(; i < 128; i++) if(!Map.count(i) && (i < 'a' || i > 'z')) break;
    Map[i] = ++cnt;
    return i;
}
int main() {
    int tt = 0;
    while(scanf("%d", &n) == 1 && n) {
        if(tt++) puts("");
        if(n == 1) {
            scanf("%s", s);
            printf("%s\n", s);
            continue;
        }
        cnt = 0;
        Map.clear();
        N = 0;
        for(int i = 0; i < n; i++) {
            scanf("%s", s2);
            strcpy(s+N, s2);
            N += strlen(s2);
            s[N++] = gen_sign();
            sign[i] = N-1;
        }
        s[N] = '\0';
        //cout << s <<endl;
        //for(int i = 0; i < n; i++) cout<< sign[i] <<endl;
        build_sa(127);
        get_height();
        //for(int i = 0; i < N; i++) printf("%d ", sa[i]);
        //puts("");
        //for(int i = 0; i < N; i++) printf("%d ", height[i]);
        //puts("");
        mlen = 0;
        int L = 0, R = N-1;
        A.clear();
        vector<int> B;
        while(R >= L) {
            int M = L + (R-L+1)/2;
            B.clear();
            if(find(M, B)) {
                mlen = M;
                A = B;
                L = M+1;
            }
            else R = M-1;
        }
        
        if(A.size() == 0) printf("?\n");
        for(int i = 0; i < A.size(); i++) {
            for(int j = 0; j < mlen; j++) printf("%c", s[A[i]+j]);
            printf("\n");
        }
    }
}

 

posted @ 2016-12-09 19:18  kiraa  阅读(262)  评论(0编辑  收藏  举报