后缀数组与后缀自动机

串串


感觉最近一堆坑要填。

后缀数组

后缀数组是一种可以记录后缀字典排序的一个数组,假如问题的答案跟字符串后缀的字典排序有关的话就可以尝试求解出后缀数组。

如何求解呢,考虑以下这个暴力算法:

先比较第 \(1\) 位,然后在比较第 \(2\) 位,以此类推,这样就可以很暴力的比较出来每个串的大小。

那么能不能优化呢,考虑上面那个算法没有利用上后缀的性质,如何用上呢,我们发现我们再比较第二位的时候,其实第一位的结果就已经告诉你答案了,于是我们考虑利用上这个答案,在比较第二位是用第一位的答案来做,然后我们考虑直接比较前 \(4\) 位,因为答案可以通过前两位的答案评出来 (\(2 + 2 = 4\)),于是一直倍增过去就只用比较 \(O(\log n)\) 次,一次 \(\text{sort}\) 的复杂度位 \(O(n \log n)\) 的,总复杂度就是 \(O(n \log ^ 2 n)\) 的。

点击查看代码
//これも運命じゃないか
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io {
    class In {
        public:
            template<typename T>
            inline In &operator>>(T &x) {
                x=0; bool f=0; char c=getchar();
                while(c<'0'||c>'9') f|=(c=='-'),c=getchar();
                while(c>='0'&&c<='9') x=x*10+c-'0',c=getchar();
                if(c=='.') {
                    c=getchar(); double dot=0.1;
                    while(c>='0'&&c<='9') x+=(c-'0')*dot,dot*=0.1,c=getchar();
                } return (f?x=-x:x),*this;
            }
            inline In &operator>>(char &x) {while(isspace(x=getchar())); return *this;}
            inline In &operator>>(char *x) {
                char c=getchar(); while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) *(x++)=c,c=getchar();
                return *x=0,*this;
            }
            inline In &operator>>(string &x) {
                char c=getchar(); x.clear();
                while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) x.push_back(c),c=getchar();
                return *this;
            }
            inline In &operator>>(In &in) { return in;}
    };
    class Out {
        private:
            char buf[35]; short dot=6,top=0;
        public:
            template<typename T>
            inline Out &operator<<(T x) {
                if(x<0) putchar('-'),x=-x;
                do { buf[++top]=x%10,x/=10;} while(x);
                while(top) putchar(buf[top--]|'0'); return *this;
            }
            inline Out &operator<<(char c) {return putchar(c),*this;}
            inline Out &operator<<(string x) {for(auto c:x) putchar(c); return *this;}
            inline Out &operator<<(char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(const char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(double x) {snprintf(buf,sizeof(buf),"%.*lf",dot,x); return (*this)<<buf;}
            inline Out &operator<<(Out &out) {return out;}
            inline Out &setdot(const int n) {return dot=n,*this;}
    };
    In fin;
    Out fout;
    inline Out &setdot(const int n,Out& out=fout) {return fout.setdot(n),out;}
    inline In &getline(char *x,In& in=fin) {
        char c=getchar();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) (*x++)=c,c=getchar();
        return *x=0,in;
    }
    inline In &getline(string &x,In& in=fin) {
        char c=getchar(); x.clear();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) x.push_back(c),c=getchar();
        return in;
    }
}
using namespace io;
inline int read(){
    int x; fin >> x; return x;
}
const int N = 2e6 + 10;
char ch[N];
int n;
int sa[N], rk[N];
int tmp[N];

void solve(){
    for(int i = 1; i <= n; i++){
        rk[i] = ch[i];
        sa[i] = i;  
    }
    for(int len = 1; len <= n; len *= 2){
        sort(sa + 1, sa + 1 + n, [=](int x, int y){
            return (rk[x] != rk[y]) ? (rk[x] < rk[y]) : (rk[x + len] < rk[y + len]);
        });
        for(int i = 1; i <= n; i++){
            tmp[i] = rk[i];
        }
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
    }
}
signed main() {
#ifndef Air
    freopen(".in","r",stdin);
    freopen(".out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    fin >> (ch + 1);   
    n = strlen(ch + 1);
    solve();
    for(int i = 1; i <= n; i++){
        cout << sa[i] << ' ';
    }  
    return 0;
}

但有时这个效率还不够,于是我们把 \(\text{sort}\) 换成基数排序就行了,复杂度就是 \(O(n \log n)\) 的。

点击查看代码
//これも運命じゃないか
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io {
    class In {
        public:
            template<typename T>
            inline In &operator>>(T &x) {
                x=0; bool f=0; char c=getchar();
                while(c<'0'||c>'9') f|=(c=='-'),c=getchar();
                while(c>='0'&&c<='9') x=x*10+c-'0',c=getchar();
                if(c=='.') {
                    c=getchar(); double dot=0.1;
                    while(c>='0'&&c<='9') x+=(c-'0')*dot,dot*=0.1,c=getchar();
                } return (f?x=-x:x),*this;
            }
            inline In &operator>>(char &x) {while(isspace(x=getchar())); return *this;}
            inline In &operator>>(char *x) {
                char c=getchar(); while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) *(x++)=c,c=getchar();
                return *x=0,*this;
            }
            inline In &operator>>(string &x) {
                char c=getchar(); x.clear();
                while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) x.push_back(c),c=getchar();
                return *this;
            }
            inline In &operator>>(In &in) { return in;}
    };
    class Out {
        private:
            char buf[35]; short dot=6,top=0;
        public:
            template<typename T>
            inline Out &operator<<(T x) {
                if(x<0) putchar('-'),x=-x;
                do { buf[++top]=x%10,x/=10;} while(x);
                while(top) putchar(buf[top--]|'0'); return *this;
            }
            inline Out &operator<<(char c) {return putchar(c),*this;}
            inline Out &operator<<(string x) {for(auto c:x) putchar(c); return *this;}
            inline Out &operator<<(char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(const char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(double x) {snprintf(buf,sizeof(buf),"%.*lf",dot,x); return (*this)<<buf;}
            inline Out &operator<<(Out &out) {return out;}
            inline Out &setdot(const int n) {return dot=n,*this;}
    };
    In fin;
    Out fout;
    inline Out &setdot(const int n,Out& out=fout) {return fout.setdot(n),out;}
    inline In &getline(char *x,In& in=fin) {
        char c=getchar();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) (*x++)=c,c=getchar();
        return *x=0,in;
    }
    inline In &getline(string &x,In& in=fin) {
        char c=getchar(); x.clear();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) x.push_back(c),c=getchar();
        return in;
    }
}
using namespace io;
inline int read(){
    int x; fin >> x; return x;
}
int n;
const int N = 2e6 + 10;
char ch[N];
int a[N];
int sa[N], rk[N];
int hg[N];
int tmp[N];
int cnt[N];
void get_sa(){
    for(int i = 1; i <= n; i++){
        sa[i] = i;
        rk[i] = ch[i];
        cnt[rk[i]] ++;
    }
    int m = 128;
    for(int i = 1; i <= m; i++){
        cnt[i] += cnt[i - 1];
    }
    for(int i = n; i >= 1; i--){
        sa[cnt[rk[i]]--] = i;
    }
    for(int len = 1;; len <<= 1){
        int now = 0;
        for(int i = n - len + 1; i <= n; i++){
            tmp[++now] = i;
        }
        for(int i = 1; i <= n; i++){
            if(sa[i] > len)
            tmp[++now] = sa[i] - len;
        }
        memset(cnt, 0, sizeof cnt);
        for(int i = 1; i <= n; i++) cnt[rk[i]] ++;
        for(int i = 1; i <= m; i++) cnt[i] += cnt[i - 1]; 
        for(int i = n; i >= 1; i--) sa[cnt[rk[tmp[i]]]--] = tmp[i];
        for(int i = 1; i <= n; i++) tmp[i] = rk[i];
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
        if(tot == n) break;
        m = tot;
    }
}
void get_hg(){
    int now = 0;
    for(int i = 2; i <= n; i++){
        int j = sa[rk[i] - 1];
        if(now) now --;
        while(j + now <= n && i + now <= n && ch[j + now] == ch[i + now]){
            now++;
        }
        hg[rk[i]] = now;
    }
}
int fa[N];
struct Data{
    int x1, x2, y1, y2;//最大次大最小次小 
    friend Data operator + (Data x, Data y){
        Data ans = x;
        if(y.x1 > ans.x1){
            ans.x2 = ans.x1;
            ans.x1 = y.x1;
        }
        else{
            if(y.x1 > ans.x2){
                ans.x2 = y.x1;
            }
        }
        if(y.x2 > ans.x2){
            ans.x2 = y.x2;
        }
        if(y.y1 < ans.y1){
            ans.y2 = ans.y1;
            ans.y1 = y.y1;
        }
        else{
            if(y.y1 < ans.y2){
                ans.y2 = y.y1;
            }
        }
        if(y.y2 < ans.y2){
            ans.y2 = y.y2;
        }
        return ans;
    }
}dat[N];
int get(int x){
    return (fa[x] == x) ? (x) : (fa[x] = get(fa[x]));
}
void merge(int x, int y){
    int fx = get(x), fy = get(y);
    if(fx == fy) return ;
    dat[fx] = dat[fx] + dat[fy];
}
signed main() {
#ifndef Air
    freopen(".in","r",stdin);
    freopen(".out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    // n = read();
    fin >> (ch + 1);
    n = strlen(ch + 1);
    // for(int i = 1; i <= n; i++){
        // a[i] = read();
    // }
    get_sa();
    for(int i = 1; i <= n; i++){
        cout << sa[i] << ' ';
    }
    get_hg();

    return 0;
}

假如现在我们还要求解拍完序后相邻两个后缀串的 \(\text{LCP}\),记作 \(hg_i\),该如何求呢?

我们考虑一个定理就是:

\(\text{LCP}(i, sa_{rk_i - 1}) \ge \text{LCP}(i - 1, sa_{rk_{i - 1} - 1}) - 1\)

其中 \(sa\) 表示第 \(i\) 小的后缀的起始点, \(rk\) 表示起始点为 \(i\) 的排名。

考虑如何证明呢:

这里不妨假设 \(\text{LCP}(i - 1, sa_{rk_{i - 1} - 1}) \ge 1\)

我们记录 \(i - 1 = a + A + D\)\(sa_{rk_{i - 1} - 1} = a + A + B\) 其中小写字母为字符,大写字母为字符串,那么 \(i = A + D\),且 \(sa_{rk_{i - 1} - 1} + 1 = A + B\),由于 \(sa_{rk_i - 1}\) 是第一个字典序小于 \(i\) 的,那么就有:$A + D > sa_{rk_i - 1} \ge A + B $ 那么肯定存在一个 \(A\)\(i\)\(sa_{rk_i - 1}\)\(\text{LCP}\),证毕。

那么有了上面那个后就可以暴力做了,复杂度就是对的。

点击查看代码
//これも運命じゃないか
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io{
    inline int read(){
        int f = 1, t = 0; char ch = getchar();
        while(ch < '0' || ch > '9'){if(ch == '-') f = -f; ch = getchar();}
        while(ch >= '0' && ch <= '9'){t = t * 10 + ch - '0'; ch = getchar();}
        return t * f;
    }
    inline void write(int x){
        if(x < 0){putchar('-'); x = -x;}
        if(x >= 10){write(x / 10);}
        putchar(x % 10 + '0');
    }
}
using namespace io;
string ch;
const int N = 2e6 + 10;
int sa[N];
int n;
int rk[N];
int tmp[N];
void solve(){
    for(int i = 1; i <= n; i++){
        rk[i] = ch[i];
        sa[i] = i;
    }
    for(int len = 1; len <= n; len *= 2){
        sort(sa + 1, sa + 1 + n, [&](int x, int y){
            return (rk[x] != rk[y]) ? (rk[x] < rk[y]) : (rk[x + len] < rk[y + len]);
        });
        for(int i = 1; i <= n; i++){
            tmp[i] = rk[i];
        }
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
    }
}
int hig[N];

void get_hig(){
    int now = 0;
    for(int i = 1; i <= n; i++){
        int j = sa[rk[i] - 1];
        if(now){
            now --;
        }
        while(i + now <= n && j + now <= n){
            if(ch[i + now] == ch[j + now]){
                now++;
            }
            else{
                break;
            }
        }
        hig[rk[i]] = now;
    }
}
signed main() {
#ifndef Air
    freopen("test.in","r",stdin);
    freopen("test.out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    cin >> ch;
    ch = ' ' + ch;
    n = ch.size() - 1;
    solve();
    for(int i = 1; i <= n; i++){
        cout << sa[i] - 1 << ' ';
    }
    cout << '\n';
    get_hig();
    for(int i = 1; i <= n; i++){
        cout << hig[i] << ' ';
    }
    cout << '\n';
    return 0;
}

那么接下来考虑一个问题,就是任意两个后缀的 \(\text{LCP}\) 如何去求呢:

答案就是他们所对应的 \(rk\) 区间,\(hg_i\) 的最小值,为什么呢,考虑如果一直大于等于某个数,那么肯定前缀也大于等于那个数,而如果 \(hg\) 减小了,由于字典序是单调递增的,所以肯定不会再加回来,所以答案有小于等于这个数,结合一下就是等于。

P2178

直接反着做成合并就行了。

点击查看代码
//これも運命じゃないか
//跑出来sa之后
//问题就转变为一段连续区间每次分裂区间
//问区间两数乘积最大值
//可以反着做然后并查集之
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io {
    class In {
        public:
            template<typename T>
            inline In &operator>>(T &x) {
                x=0; bool f=0; char c=getchar();
                while(c<'0'||c>'9') f|=(c=='-'),c=getchar();
                while(c>='0'&&c<='9') x=x*10+c-'0',c=getchar();
                if(c=='.') {
                    c=getchar(); double dot=0.1;
                    while(c>='0'&&c<='9') x+=(c-'0')*dot,dot*=0.1,c=getchar();
                } return (f?x=-x:x),*this;
            }
            inline In &operator>>(char &x) {while(isspace(x=getchar())); return *this;}
            inline In &operator>>(char *x) {
                char c=getchar(); while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) *(x++)=c,c=getchar();
                return *x=0,*this;
            }
            inline In &operator>>(string &x) {
                char c=getchar(); x.clear();
                while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) x.push_back(c),c=getchar();
                return *this;
            }
            inline In &operator>>(In &in) { return in;}
    };
    class Out {
        private:
            char buf[35]; short dot=6,top=0;
        public:
            template<typename T>
            inline Out &operator<<(T x) {
                if(x<0) putchar('-'),x=-x;
                do { buf[++top]=x%10,x/=10;} while(x);
                while(top) putchar(buf[top--]|'0'); return *this;
            }
            inline Out &operator<<(char c) {return putchar(c),*this;}
            inline Out &operator<<(string x) {for(auto c:x) putchar(c); return *this;}
            inline Out &operator<<(char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(const char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(double x) {snprintf(buf,sizeof(buf),"%.*lf",dot,x); return (*this)<<buf;}
            inline Out &operator<<(Out &out) {return out;}
            inline Out &setdot(const int n) {return dot=n,*this;}
    };
    In fin;
    Out fout;
    inline Out &setdot(const int n,Out& out=fout) {return fout.setdot(n),out;}
    inline In &getline(char *x,In& in=fin) {
        char c=getchar();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) (*x++)=c,c=getchar();
        return *x=0,in;
    }
    inline In &getline(string &x,In& in=fin) {
        char c=getchar(); x.clear();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) x.push_back(c),c=getchar();
        return in;
    }
}
using namespace io;
inline int read(){
    int x; fin >> x; return x;
}
int n;
const int N = 2e6 + 10, INF = 1e9;
char ch[N];
int a[N];
int sa[N], rk[N];
int hg[N];
int tmp[N];
int cnt[N];
void get_sa(){
    for(int i = 1; i <= n; i++){
        sa[i] = i;
        rk[i] = ch[i];
        cnt[rk[i]] ++;
    }
    int m = 128;
    for(int i = 1; i <= m; i++){
        cnt[i] += cnt[i - 1];
    }
    for(int i = n; i >= 1; i--){
        sa[cnt[rk[i]]--] = i;
    }
    for(int len = 1;; len <<= 1){
        int now = 0;
        for(int i = n - len + 1; i <= n; i++){
            tmp[++now] = i;
        }
        for(int i = 1; i <= n; i++){
            if(sa[i] > len)
            tmp[++now] = sa[i] - len;
        }
        memset(cnt, 0, sizeof cnt);
        for(int i = 1; i <= n; i++) cnt[rk[i]] ++;
        for(int i = 1; i <= m; i++) cnt[i] += cnt[i - 1]; 
        for(int i = n; i >= 1; i--) sa[cnt[rk[tmp[i]]]--] = tmp[i];
        for(int i = 1; i <= n; i++) tmp[i] = rk[i];
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
        if(tot == n) break;
        m = tot;
    }
}
void get_hg(){
    int now = 0;
    for(int i = 1; i <= n; i++){
        int j = sa[rk[i] - 1];
        if(now) now --;
        while(j + now <= n && i + now <= n ){
            if(ch[j + now] == ch[i + now]) now++;
            else break;
        }
        hg[rk[i]] = now;
    }
}
int fa[N];
int siz[N];
struct Data{
    int x1, x2, y1, y2;//最大次大最小次小 
    friend Data operator + (Data x, Data y){
        Data ans = x;
        if(y.x1 > ans.x1){
            ans.x2 = ans.x1;
            ans.x1 = y.x1;
        }
        else{
            if(y.x1 > ans.x2){
                ans.x2 = y.x1;
            }
        }
        if(y.x2 > ans.x2){
            ans.x2 = y.x2;
        }
        if(y.y1 < ans.y1){
            ans.y2 = ans.y1;
            ans.y1 = y.y1;
        }
        else{
            if(y.y1 < ans.y2){
                ans.y2 = y.y1;
            }
        }
        if(y.y2 < ans.y2){
            ans.y2 = y.y2;
        }
        return ans;
    }
}dat[N];
int get(int x){
    return (fa[x] == x) ? (x) : (fa[x] = get(fa[x]));
}
int ans[N];
int now = -1e18;
int res[N];
int ot = 0;

void merge(int x, int y){
    int fx = get(x), fy = get(y);
    if(fx == fy) return ;
    dat[fx] = dat[fx] + dat[fy];
    fa[fy] = fx;
    ot += siz[fx] * siz[fy];
    siz[fx] += siz[fy];
    now = max(now, dat[fx].x1 * dat[fx].x2);
    now = max(now, dat[fx].y1 * dat[fx].y2);
}
vector<int>e[N];
void get_ans(int p){
    for(auto y: e[p]){
        merge(y - 1, y);
    }
    ans[p] = now;
    res[p] = ot;
}
signed main() {
#ifndef Air
    freopen(".in","r",stdin);
    freopen(".out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    n = read();
    fin >> (ch + 1);
    n = strlen(ch + 1);
    for(int i = 1; i <= n; i++){
        a[i] = read();
    }
    get_sa();
    get_hg();
    for(int i = 1; i <= n; i++){
        fa[i] = i;
        siz[i] = 1;
        dat[i] = {a[sa[i]], -INF, a[sa[i]], INF};
    }
    for(int i = 2; i <= n; i++){
        // cerr << hg[i] << ' ';
        e[hg[i]].push_back(i);
    }
    // cerr << '\n';
    for(int i = n; i >= 0; i--){
        get_ans(i);
    }
    for(int i = 0; i <= n - 1; i++){
        if(ans[i] == -1e18) ans[i] = 0;
        cout << res[i] << ' ' << ans[i] << '\n';
    }
    return 0;
}

接下来我们再考虑一个问题,如何求本质不同的子串,我们可以计算全部的减去相同的,那么如何计算对于每个起始点,已经被计算过的呢,发现答案就是 \(hg_i\),因为考虑这样一个事情就是每个点往前最长的公共前缀肯定是最近的那个,因为我们计算最长公共前缀是去 \(\max\) 的一个事情,所以重复的肯定就是 \(hg_i\),那么本质不同的就是 \(\frac{n (n - 1)}{2} - \sum hg_i\)

P4070

我们考虑从后面插入的话,每次都会在每个后缀后面插入一个,\(hg\) 是跟长度有关的,不好,于是我们考虑反向从前面插入,然后就做完了。

点击查看代码
//これも運命じゃないか
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io{
    inline int read(){
        int f = 1, t = 0; char ch = getchar();
        while(ch < '0' || ch > '9'){if(ch == '-') f = -f; ch = getchar();}
        while(ch >= '0' && ch <= '9'){t = t * 10 + ch - '0'; ch = getchar();}
        return t * f;
    }
    inline void write(int x){
        if(x < 0){putchar('-'); x = -x;}
        if(x >= 10){write(x / 10);}
        putchar(x % 10 + '0');
    }
}
using namespace io;
int n;
const int N = 2e5 + 10;
int sa[N], rk[N];
int hg[N];
int tmp[N];
int a[N];
void get_sa(){
    for(int i = 1; i <= n; i++){
        sa[i] = i;
        rk[i] = a[i];
    }
    for(int len = 1; len <= n; len <<= 1){
        sort(sa + 1, sa + 1 + n, [&](int x, int y){
            return (rk[x] != rk[y]) ? (rk[x] < rk[y]) : (rk[x + len] < rk[y + len]);
        });
        for(int i = 1; i <= n; i++){
            tmp[i] = rk[i];
        }
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
    }
}
void get_hg(){
    int now = 0;
    for(int i = 1; i <= n; i++){
        int j = sa[rk[i] - 1];
        if(now) now --;
        while(i + now <= n && j + now <= n && a[i + now] == a[j + now]){
            now ++;
        }
        hg[rk[i]] = now;
    }
}
int st[N][30];
int pre[N];
void prework(){
    pre[0] = pre[1] = 0;
    for(int i = 2; i <= n; i++){
        pre[i] = pre[i / 2] + 1;
    }
}
int ask(int l, int r){
    int step = pre[r - l + 1];
    return min(st[l][step], st[r - (1ll << step) + 1][step]);
}
set<int>se;
signed main() {
#ifndef Air
    freopen(".in","r",stdin);
    freopen(".out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    n = read();
    for(int i = 1; i <= n; i++){
        a[n - i + 1] = read();
    }
    prework();
    get_sa();
    get_hg();
    for(int i = 1; i <= n; i++){
        st[i][0] = hg[i];
    }
    for(int j = 1; j <= 20; j++){
        for(int i = 1; i + (1ll << j) - 1 <= n; i++){
            st[i][j] = min(st[i][j - 1], st[i + (1ll << (j - 1))][j - 1]);
        }
    }
    int tp = 0;
    for(int i = n; i >= 1; i--){
        int nrk = rk[i];
        auto nt = se.lower_bound(nrk);
        int l = 0, r = 0;
        if(se.size() && (*se.rbegin()) > nrk){
            r = (*nt);
        }
        if(se.size() && nt != se.begin()){
            nt --;
            l = (*nt);
        }
        if(l && r){
            tp -= ask(l + 1, r);
        }
        if(l){
            tp += ask(l + 1, nrk);
        }
        if(r){
            tp += ask(nrk + 1, r);
        }
        int id = (n - i +  1);
        se.insert(nrk);
        cout << id * (id + 1) / 2 - tp << '\n';
    }
    return 0;
}

那么假如现在我们要比较固定子串的大小,也可以尝试使用后缀数组,比如:

P6095

我们发现每段的长度是固定的要么 \(\lceil \frac{n}{k} \rceil\) 要么 \(\lfloor \frac{n}{k} \rfloor\),然后我们考虑二分答案,那么就相当于比较定长的两个子串,然后我们发现非子串肯定没有用,然后我们就可以建出来后缀数组然后直接做就行了。

点击查看代码
//これも運命じゃないか
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io {
    class In {
        public:
            template<typename T>
            inline In &operator>>(T &x) {
                x=0; bool f=0; char c=getchar();
                while(c<'0'||c>'9') f|=(c=='-'),c=getchar();
                while(c>='0'&&c<='9') x=x*10+c-'0',c=getchar();
                if(c=='.') {
                    c=getchar(); double dot=0.1;
                    while(c>='0'&&c<='9') x+=(c-'0')*dot,dot*=0.1,c=getchar();
                } return (f?x=-x:x),*this;
            }
            inline In &operator>>(char &x) {while(isspace(x=getchar())); return *this;}
            inline In &operator>>(char *x) {
                char c=getchar(); while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) *(x++)=c,c=getchar();
                return *x=0,*this;
            }
            inline In &operator>>(string &x) {
                char c=getchar(); x.clear();
                while(isspace(c)) c=getchar(); while(!isspace(c)&&~c) x.push_back(c),c=getchar();
                return *this;
            }
            inline In &operator>>(In &in) { return in;}
    };
    class Out {
        private:
            char buf[35]; short dot=6,top=0;
        public:
            template<typename T>
            inline Out &operator<<(T x) {
                if(x<0) putchar('-'),x=-x;
                do { buf[++top]=x%10,x/=10;} while(x);
                while(top) putchar(buf[top--]|'0'); return *this;
            }
            inline Out &operator<<(char c) {return putchar(c),*this;}
            inline Out &operator<<(string x) {for(auto c:x) putchar(c); return *this;}
            inline Out &operator<<(char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(const char *x) {while(*x) putchar(*(x++)); return *this;}
            inline Out &operator<<(double x) {snprintf(buf,sizeof(buf),"%.*lf",dot,x); return (*this)<<buf;}
            inline Out &operator<<(Out &out) {return out;}
            inline Out &setdot(const int n) {return dot=n,*this;}
    };
    In fin;
    Out fout;
    inline Out &setdot(const int n,Out& out=fout) {return fout.setdot(n),out;}
    inline In &getline(char *x,In& in=fin) {
        char c=getchar();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) (*x++)=c,c=getchar();
        return *x=0,in;
    }
    inline In &getline(string &x,In& in=fin) {
        char c=getchar(); x.clear();
        while(!(c==' '||!isspace(c))) c=getchar(); while(c==' '||!isspace(c)) x.push_back(c),c=getchar();
        return in;
    }
}
using namespace io;
inline int read(){
    int x; fin >> x; return x;
}
int n, m;
const int N = 8e5 + 10;
char ch[N];
int sa[N], rk[N];
int tmp[N];
int t;
void get_sa(){
    for(int i = 1; i <= n; i++){
        sa[i] = i;
        rk[i] = ch[i];
    }
    for(int len = 1; len <= n; len <<= 1){
        sort(sa + 1, sa + 1 + n, [&](int x, int y){
            return (rk[x] != rk[y]) ? (rk[x] < rk[y]) : (rk[x + len] < rk[y + len]);
        });
        for(int i = 1; i <= n; i++){
            tmp[i] = rk[i];
        }
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
    }
}
bool check(int mid){
    for(int i = 1; i <= t; i++){
        int now = i;
        int sum = 0;
        for(int j = 1; j <= m; j++){
            if(rk[now] <= mid){
                sum += t;
            }
            else{
                sum += t - 1;
            }
            now = (i + sum - 1) % (n / 2) + 1;
        }
        // cerr << sum << ' ' << now << '\n';
        if(sum >= n / 2) return 1; 
    }
    return 0;
}
signed main() {
#ifndef Air
    freopen(".in","r",stdin);
    freopen(".out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    n = read();
    m = read();
    t = (n - 1) / m + 1;
    fin >> (ch + 1);
    for(int i = 1; i <= n; i++){
        ch[i + n] = ch[i];
    }
    n *= 2;
    get_sa();
    int l = 1, r = n;
    // for(int i = l; i <= r; i++){
    //     cerr << sa[i] << '\n';
    // }
    while(l + 1 < r){
        int mid = (l + r) >> 1;
        if(check(mid)){
            r = mid;
        }
        else{
            l = mid + 1;
        }
    }
    for(int i = l; i <= r; i++){
        if(check(i)){
            for(int j = 1; j <= t; j++){
                cout << ch[sa[i] + j - 1];
            }
            return 0;
        }
    }
    return 0;
}

P4248

SA 套路题,感觉跟 P2178 处理方式一样。

点击查看代码
//これも運命じゃないか
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io{
    inline int read(){
        int f = 1, t = 0; char ch = getchar();
        while(ch < '0' || ch > '9'){if(ch == '-') f = -f; ch = getchar();}
        while(ch >= '0' && ch <= '9'){t = t * 10 + ch - '0'; ch = getchar();}
        return t * f;
    }
    inline void write(int x){
        if(x < 0){putchar('-'); x = -x;}
        if(x >= 10){write(x / 10);}
        putchar(x % 10 + '0');
    }
}
using namespace io;
const int N = 1000010;
string ch;
int n;
int sa[N], rk[N];
int tmp[N];
int hg[N];
void get_sa(){
    for(int i = 1; i <= n; i++){
        sa[i] = i;
        rk[i] = ch[i];
    }
    for(int len = 1; len <= n; len <<= 1){
        sort(sa + 1, sa + 1 + n, [&](int x, int y){
            return (rk[x] != rk[y]) ? (rk[x] < rk[y]) : (rk[x + len] < rk[y + len]); 
        });
        for(int i = 1; i <= n; i++){
            tmp[i] = rk[i];
        }
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
    }
}
void get_hg(){
    int now = 0;
    for(int i = 1; i <= n; i++){
        int j = sa[rk[i] - 1];
        if(now) now--;
        while(i + now <= n && j + now <= n && ch[i + now] == ch[j + now]){
            now ++;
        }
        hg[rk[i]] = now;
    }
}
vector<int>e[N];
int sum = 0;
int fa[N];
int siz[N];
int las = 0;
int get(int x){
    return (fa[x] == x) ? (x) : (fa[x] = get(fa[x]));
}
void merge(int x, int y){
    int fx = get(x), fy = get(y);
    if(fx == fy) return ;
    fa[fx] = fy;
    sum += siz[fx] * siz[fy];
    // cerr << siz[fx] * siz[fy] << '\n';
    siz[fy] += siz[fx];
}

signed main() {
#ifndef Air
    freopen(".in","r",stdin);
    freopen(".out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    cin >> ch;
    n = ch.size();
    ch = ' ' + ch;
    get_sa();
    get_hg();
    int ans = 0;
    for(int i = 1; i <= n; i++){
        ans += i * (n - 1);
        // cerr << sa[i] <<  ' ' << rk[i] << '\n';
        fa[i] = i;
        siz[i] = 1;
    }
    // ans *= 2;
    for(int i = 2; i <= n; i++){
        // cerr << hg[i] << '\n';
        e[hg[i]].push_back(i);
    }
    // cerr << '\n';
    for(int i = n; i >= 1; i--){
        las = sum;
        for(auto y: e[i]){
            merge(y, y - 1);
        }
        // cerr << sum - las << ' ' << '\n';
        ans -= (sum - las) * i * 2;
    }
    cout << ans << '\n';

    return 0;
}

有没有什么办法可以 \(O(1)\) 比较两个子串大小呢,有的兄弟,有的,我们还是先建出来后缀数组,假如说我们现在要比较 \(l1 \to r1\)\(l2 \to r2\) 的字典序,那么我们可以记录长度分别为 \(s1,s2\),考虑分类讨论:

  • \(s2 \le \text{LCP}(l1, l2)\) 那么就相当于比较 \(s1\)\(s2\) 的大小。
  • 否则则相当于比较 \(rk_{l1}\)\(rk_{l2}\) 的大小。

会了这个之后,来看一道题:

BZOJ4310

考虑二分答案,那么我们就需要解决一个问题就是如何快速拿出全部子串,然后我们就发现后缀数组中相邻的两个后缀的前缀的排序是指挥相交或相离的,于是我们可以先二分出来在那个后缀里,然后二分这个后缀的前缀来寻找答案。

那么二分答案如何判断呢?

我们可以考虑贪心,从前往后扫,每次就找最长的满足最大字典序的子串小于二分的答案的点,然后如何维护最大字典序的子串呢?我们每次发现答案一定是目前这个串的一个后缀,然后我们发现起始位置是不变的然后每次长度增加,按照上面那个条件可以找到我新的后缀什么时候会大于原来的后缀,然后用个优先队列维护之就作完了,挺神仙的。

点击查看代码
//これも運命じゃないか
#include<bits/stdc++.h>
using namespace std;
#define int long long
#define uint unsigned long long
#define double long double
#define Air
namespace io{
    inline int read(){
        int x; cin >> x; return x;
    }
    inline void write(int x){
        if(x < 0){putchar('-'); x = -x;}
        if(x >= 10){write(x / 10);}
        putchar(x % 10 + '0');
    }
}
using namespace io;
string ch;
int n, m;
const int N = 2e5 + 10;
int sa[N], rk[N];
int tmp[N];
int hg[N];
void get_sa(){
    for(int i = 1; i <= n; i++){
        sa[i] = i;
        rk[i] = ch[i];       
    }
    for(int len = 1; len <= n; len <<= 1){
        sort(sa + 1, sa + 1 + n, [&](int x, int y){
            return (rk[x] != rk[y]) ? (rk[x] < rk[y]) : (rk[x + len] < rk[y + len]);
        });
        for(int i = 1; i <= n; i++){
            tmp[i] = rk[i];
        }
        int tot = 0;
        for(int i = 1; i <= n; i++){
            if(tmp[sa[i]] != tmp[sa[i - 1]] || tmp[sa[i] + len] != tmp[sa[i - 1] + len]){
                tot ++;
            }
            rk[sa[i]] = tot;
        }
    }
}
void get_hg(){
    int now = 0;
    for(int i = 1; i <= n; i++){
        int j = sa[rk[i] - 1];
        if(now) now --;
        while(i + now <= n && j + now <= n && ch[i + now] == ch[j + now]){
            now ++;
        }
        hg[rk[i]] = now;
    }
}
int st[N][30];
int pre[N];
void prework(){
    for(int i = 2; i <= n; i++){
        pre[i] = pre[i / 2] + 1;
    }
}
int ask(int l, int r){
    if(l > r) return 0;
    int step = pre[r - l + 1];
    return min(st[l][step], st[r - (1ll << step) + 1][step]);
}
struct Data{
    int st, len;
    friend bool operator < (Data x, Data y){
        int l = min(rk[x.st], rk[y.st]);
        int r = max(rk[x.st], rk[y.st]);
        int lcp = ask(l + 1, r);
        if(x.st == y.st) lcp = 1e18;
        if(y.len > lcp){
            return rk[x.st] < rk[y.st];
        }
        else{
            return x.len < y.len;
        }
    }
};
struct Ques{
    int pos, st;
    friend bool operator < (Ques x, Ques y){
        return (x.pos != y.pos) ? (x.pos > y.pos) : (x.st > y.st);
    }
};
priority_queue<Ques>q;
bool check(int x, int pos){
    int now = 1;
    for(int i = 1; i <= m; i++){
        int ed = n + 1;
        Data tmp = {now, 0};
        while(q.size()) q.pop();
        for(int j = now; j <= n; j++){
            tmp.len ++;
            if(rk[j] > rk[tmp.st]){//我需要修改
                int l = min(rk[j], rk[tmp.st]);
                int r = max(rk[j], rk[tmp.st]);
                int lcp = ask(l + 1, r);
                q.push({j + lcp, j});
            }
            while(q.size() && q.top().pos == j){
                Data p = {q.top().st, j - q.top().st + 1};
                if(tmp < p){
                    tmp = p;
                }
                else{
                    if(rk[q.top().st] > rk[tmp.st]){
                        int l = min(rk[q.top().st], rk[tmp.st]);
                        int r = max(rk[q.top().st], rk[tmp.st]);
                        int lcp = ask(l + 1, r);
                        q.push({q.top().st + lcp, q.top().st});    
                    }
                }
                q.pop();
            }
            if((Data){x, pos} < tmp){
                ed = j;
                break;
            }
        }
        now = ed;
    }
    if(now > n) return 1;
    return 0;
}
signed main() {
#ifndef Air
    freopen(".in","r",stdin);
    freopen(".out","w",stdout);
#endif
    ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
    m = read();
    cin >> ch;
    n = ch.size();
    ch = ' ' + ch;
    prework();
    get_sa();
    get_hg();
    for(int i = 1; i <= n; i++){
        st[i][0] = hg[i];
        // cerr << ch[i] << ' '  <<  rk[i] << '\n';
    }
    for(int j = 1; j <= 20; j++){
        for(int i = 1; i + (1ll << j) - 1 <= n; i++){
            st[i][j] = min(st[i][j - 1], st[i + (1ll << (j - 1))][j - 1]);
        }
    }
    int l = 1, r = n;//先看答案在那个后缀里
    // cerr << rk[50] << '\n';
    // cerr << "!! " << check(1, 1e18);
    // return 0;
    while(l + 1 < r){
        int mid = (l + r) >> 1;
        if(check(sa[mid], 1e18)){
            r = mid;
        }
        else{
            l = mid + 1;
        }
    }
    int tp1 = 0;
    for(int i = l; i <= r; i++){
        if(check(sa[i], 1e18)){
            tp1 = sa[i];
            break;
        }
    }
    l = 1, r = n;
    while(l + 1 < r){
        int mid = (l + r) >> 1;
        if(check(tp1, mid)){
            r = mid;
        }
        else{
            l = mid + 1;
        }
    }
    for(int i = l; i <= r; i++){
        if(check(tp1, i)){
            for(int j = 0; j < i; j++){
                cout << ch[tp1 + j];
            }
            return 0;
        }
    }
    return 0;
}
posted @ 2025-12-26 15:16  Air2011  阅读(9)  评论(0)    收藏  举报