SCOJ 4493: DNA 最长公共子串 后缀自动机

4493: DNA

题目连接:

http://acm.scu.edu.cn/soj/problem.action?id=4493

Description

Deoxyribonucleic acid (DNA) is a molecule that carries most of the genetic instructions used in the development,
functioning and reproduction of all known living organisms and many viruses.
Most DNA molecules consist of two biopolymer strands coiled around each other to form a double helix.
The two DNA strands are known as polynucleotides since they are composed of simpler units called nucleotides.
Each nucleotide is composed of a nitrogen-containing nucleobase—either cytosine (C), guanine (G), adenine (A), or thymine (T)—
as well as a monosaccharide sugar called deoxyribose and a phosphate group. According to base pairing rules (A with T, and C with G),
hydrogen bonds bind the nitrogenous bases of the two separate polynucleotide strands to make double-stranded DNA.
We define the length of a strand as the number of its nucleobases. Given a bunch of different DNA strands, for each strand,
find the length of the longest common pieces between the two complementary strands.

Input

The first line is the number of test cases, T, where 0 < T<=100.
Each line followed represents a DNA strand, whose length is no more than 5000.

Output

For each strand, print a number, indicating the answer illustrated above.

Sample Input

3
A
AT
ATAT

Sample Output

0
1
3

Hint

题意

给你一个DNA序列,然后问他的互补串和他本身的最长公共子串是多少

题解:

后缀自动机/后缀数组/hash都可以过这道题

都是一个比较裸的题

下面代码是后缀自动机的

代码

#include<bits/stdc++.h>
using namespace std;

const int maxn = 5010;
char s1[maxn], s2[maxn];

struct SAM {
    struct {
        int len, f, ch[26];
        void init() {
            len = 0, f = -1;
            memset(ch, 0xff, sizeof (ch));
        }
    } e[maxn<<1];
    int idx, last;

    void init() {
        idx = last = 0;
        e[idx++].init();
    }
    int newnode() {
        e[idx].init();
        return idx++;
    }
    void add(int c) {
        int end = newnode();
        int tmp = last;
        e[end].len = e[last].len + 1;
        for (; tmp != -1 && e[tmp].ch[c] == -1; tmp = e[tmp].f) {
            e[tmp].ch[c] = end;
        }
        if (tmp == -1) e[end].f = 0;
        else {
            int nxt = e[tmp].ch[c];
            if (e[tmp].len + 1 == e[nxt].len) e[end].f = nxt;
            else {
                int np = newnode();
                e[np] = e[nxt];
                e[np].len = e[tmp].len + 1;
                e[nxt].f = e[end].f = np;
                for (; tmp != -1 && e[tmp].ch[c] == nxt; tmp = e[tmp].f) {
                    e[tmp].ch[c] = np;
                }
            }
        }
        last = end;
    }
};

SAM sam;
void solve()
{
    scanf("%s",s1);
    int len = strlen(s1);
    for(int i=0;i<len;i++)
    {
        if(s1[i]=='A')s2[i]='T';
        if(s1[i]=='T')s2[i]='A';
        if(s1[i]=='C')s2[i]='G';
        if(s1[i]=='G')s2[i]='C';
    }
    sam.init();
    for(int i=0;i<len;i++)
        sam.add(s1[i]-'A');
    int p = 0,ans = 0,Len = 0;
    for(int i=0;i<len;i++)
    {
        int id = s2[i]-'A';
        if(sam.e[p].ch[id]!=-1)
            Len++,p=sam.e[p].ch[id];
        else
        {
            for(;p!=-1&&sam.e[p].ch[id]==-1;p=sam.e[p].f);
            if(p==-1)Len=0,p=0;
            else
            {
                Len = sam.e[p].len+1;
                p = sam.e[p].ch[id];
            }
        }
        ans = max(ans,Len);
    }
    printf("%d\n",ans);
}
int main()
{
    int t;
    scanf("%d",&t);
    while(t--)solve();
    return 0;
}
posted @ 2016-04-18 10:22  qscqesze  阅读(403)  评论(0编辑  收藏  举报