后缀数组专题
后缀数组
luogu P4051 [JSOI2007]字符加密Cipher
算是板子题,首先发现这是个循环一边的东西,很容易和环处理dp的思路想到一起,就是复制一遍再跑。
然后对这个串进行后缀排序,即可获得这一堆串的排名,然后输出就行,但是要注意的是,如果一个串长度小于了 \(len/2\),那么就说明这个东西并不合法,因为后半段没有参与排序,直接跳过即可。
点击查看代码
const int N = 2e6 + 1, M = 38000, MAXK = 1e6 + 10;
int x[N], y[N], rk[N], height[N], sa[N], cnt[N];
int n, m;
string s;
void tsort()
{
for (int i = 1; i <= m; i++) cnt[i] = 0;
for (int i = 1; i <= n; i++) cnt[x[i]]++;
for (int i = 2; i <= m; i++) cnt[i] += cnt[i - 1];
for (int i = n; i >= 1; i--) sa[cnt[x[y[i]]]--] = y[i], y[i] = 0;
}
void GetSA()
{
for (int i = 1; i <= n; i++) y[i] = i, x[i] = s[i - 1];
tsort();
for (int w = 1; w < n; w <<= 1)
{
int num = 0;
for (int i = n - w + 1; i <= n; i++) y[++num] = i;
for (int i = 1; i <= n; i++) if (sa[i] > w) y[++num] = sa[i] - w;
tsort();
swap(x, y);
x[sa[1]] = num = 1;
for (int i = 2; i <= n; i++)
{
x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + w] == y[sa[i - 1] + w]) ? num : ++num;
}
if (num == n) break;
m = num;
}
}
int ans = 0;
void GetHeight()
{
for (int i = 2; i <= n; i++) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i++)
{
if (rk[i] == 0) continue;
if (k) k--;
while (s[i + k - 1] == s[sa[rk[i] - 1] + k - 1]) k++;
height[rk[i]] = k;
}
for (int i = 1; i <= n; i++) ans -= height[i];
}
signed main()
{
// freopen("data.in", "r", stdin); freopen("data.out", "w", stdout);
// freopen("shingen.in", "r", stdin); freopen("shingen.out", "w", stdout);
ios :: sync_with_stdio(false), cin.tie(0), cout.tie(0);
cin >> s;
n = s.size();
int len = n;
n <<= 1;
s = s + s;
m = 100000;
GetSA();
GetHeight();
for (int i = 1; i <= 2 * len; i++) if (sa[i] <= len) cout << s[sa[i] + len - 1 - 1];
return 0;
}
luogu P2408 不同子串个数
算是真正的使用了 SA 精髓的第一道题,由于不同子串可以看作是所有子串 - 算重的子串,所以可以计算算重的子串数量。
怎么算呢?
两个相同的子串一定是在后缀的某个前缀上相同的,所以任意两个后缀都需要减去其中一个位置的这两个位置开始的后缀的 \(lcp\),但是这样一定会爆炸(WA+TLE),咋办?
发现两个后缀的 \(lcp\) 会在这个后缀会在排序好后的区间内被减过一次减去(\(lcp\) 为区间 \(min\)),所以无需处理,那么就只需要处理相邻的 \(lcp\) 就好,那不就是 \(height\) 吗。
所以答案就是 \(\frac{n(n+1)}{2}-\sum{height_i}\)
点击查看代码
const int N = 1e6 + 100;
int n, m, S[N];
char s[N];
int c[N], x[N], y[N], sa[N], rk[N], height[N];
class String_SA
{
public :
void Get_SA()
{
for (int i = 1; i <= n; i++) ++c[x[i] = s[i]];
for (int i = 2; i <= m; i++) c[i] += c[i - 1];
for (int i = n; i >= 1; i--) sa[c[x[i]]--] = i;
for (int k = 1; k <= n; k <<= 1)
{
int num = 0;
for (int i = n - k + 1; i <= n; i++) y[++num] = i;
for (int i = 1; i <= n; i++) if (sa[i] > k) y[++num] = sa[i] - k;
for (int i = 1; i <= m; i++) c[i] = 0;
for (int i = 1; i <= n; i++) ++c[x[i]];
for (int i = 1; i <= m; i++) c[i] += c[i - 1];
for (int i = n; i >= 1; i--) sa[c[x[y[i]]]--] = y[i], y[i] = 0;
swap(x, y);
x[sa[1]] = 1, num = 1;
for (int i = 2; i <= n; i++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++num;
if (num == n) break;
m = num;
}
// for (int i = 1; i <= n; i++) cout << sa[i] << ' ';
}
void Get_Height()
{
for (int i = 1; i <= n; i++) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i++)
{
if (rk[i] == 1) {continue;}
if (k) --k;
while (s[i + k] == s[sa[rk[i] - 1] + k]) ++k;
height[rk[i]] = k;
}
}
}SA;
int main()
{
// freopen("data.in", "r", stdin); freopen("data.out", "w", stdout);
cin >> n;
for (int i = 1; i <= n; i++) cin >> s[i];
m = n + 1000;
SA.Get_SA(); SA.Get_Height();
long long sum = 0;
for (int i = 2; i <= n; i++) sum += height[i];
long long ans = 1ll * n * (n + 1) / 2 - sum;
cout << ans;
return 0;
}
luogu P5546 [POI 2000] 公共串
简单做法:二分 hash。
不简单做法:双指针 SA。
咋用SA做呢?
首先肯定是在 \(height\) 上做文章,而可以把所有串拼起来,然后询问变成区间查询最小值。
当区间越大时,区间最小值单调不升,所以可以想到使用双指针,当前这个区间内包含所有大串时就说明这个串合法,可以更新答案。
这样就是 \(O(n)\) 的了(SA 不计)。
点击查看代码
const int V = 1e6 + 7, mod = 1e9 + 7, B = 300, MAXK = 22, N = 1e5 + 10; // !!!!!!!!
int cnt[N], n, m, c;
int flag;
string t[N], s;
int belong[N], x[N], y[N], rk[N], h[N], sa[N], logn[N], st[MAXK][N];
void tsort()
{
for (int i = 1; i <= n; i++) cnt[x[i]]++;
for (int i = 1; i <= m; i++) cnt[i] += cnt[i - 1];
for (int i = n; i >= 1; i--) sa[cnt[x[y[i]]]--] = y[i], y[i] = 0;
for (int i = 1; i <= m; i++) cnt[i] = 0;
}
void GetSA()
{
m = 255;
for (int i = 1; i <= n; i++) x[i] = s[i - 1], y[i] = i;
tsort();
for (int w = 1; w < n; w <<= 1)
{
int tot = 0;
for (int i = n - w + 1; i <= n; i++) y[++tot] = i;
for (int i = 1; i <= n; i++) if (sa[i] > w) y[++tot] = sa[i] - w;
tsort();
swap(x, y);
x[sa[1]] = tot = 1;
for (int i = 1; i <= n; i++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + w] == y[sa[i - 1] + w]) ? tot : ++tot;
m = tot;
}
}
void GetHeight()
{
for (int i = 1; i <= n; i++) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i++)
{
if (k) --k;
while (k <= n && s[sa[rk[i]] + k - 1] == s[sa[rk[i] - 1] + k - 1]) ++k;
h[rk[i]] = k;
}
}
void Build()
{
for (int i = 1; i <= n; i++) st[0][i] = h[i];
for (int i = 2; i <= n; i++) logn[i] = logn[i / 2] + 1;
for (int i = 1; i < MAXK; i++)
{
for (int j = 1; j + (1 << (i - 1)) <= n; j++)
{
st[i][j] = min(st[i - 1][j], st[i - 1][j + (1 << (i - 1))]);
}
}
}
int Query(int l, int r)
{
if (l > r) return 0;
int k = logn[r - l + 1];
return min(st[k][l], st[k][r - (1 << k) + 1]);
}
signed main()
{
// freopen("data.in", "r", stdin); freopen("data.out", "w", stdout);
ios :: sync_with_stdio(false), cin.tie(0), cout.tie(0);
cin >> c;
for (int i = 1; i <= c; i++) cin >> t[i];
for (int i = 1; i < c; i++) t[i].push_back('|');
if (c == 1)
{
cout << t[1].size();
return 0;
}
for (int i = 1; i <= c; i++)
{
for (int j = n + 1; j <= (int)(n + t[i].size()); j++) belong[j] = i;
s += t[i], n += t[i].size();
}
GetSA();
GetHeight();
Build();
// for (int i = 1; i <= n; i++) cerr << h[i] << ' ';
int ans = 0;
for (int l = 1, r = 0; l <= n; l++)
{
while (r < n && flag < c)
{
++r;
if (cnt[belong[sa[r]]] == 0) flag++;
cnt[belong[sa[r]]]++;
}
if (flag >= c) ans = max(ans, Query(l + 1, r));
cnt[belong[sa[l]]]--;
if (cnt[belong[sa[l]]] == 0) flag--;
}
cout << ans;
QED;
}
luogu P2463 [SDOI2008] Sandy 的卡片
这题其实和上面的题基本上就是一个题,只不过这个题是什么给其中一个串集体加上一个数能得到另一个串算匹配。
初见其实可能没什么思路,但仔细一想就发现了端倪,那便是两个串能匹配当且仅当这两个串的差分数组相同。
这不就是水题了吗?直接照搬上面的代码即可通过。
点击查看代码
const int V = 1100, mod = 1e9 + 7, B = 300, MAXK = 22, N = 1e6 + 10; // !!!!!!!!
int cnt[N], n, m, c;
int flag;
int s[N], t[V][V];
int belong[N], x[N], y[N], rk[N], h[N], sa[N], logn[N], st[MAXK][N], siz[N];
void tsort()
{
for (int i = 1; i <= n; i++) cnt[x[i]]++;
for (int i = 1; i <= m; i++) cnt[i] += cnt[i - 1];
for (int i = n; i >= 1; i--) sa[cnt[x[y[i]]]--] = y[i], y[i] = 0;
for (int i = 1; i <= m; i++) cnt[i] = 0;
}
void GetSA()
{
m = 6000;
// cerr << n;
for (int i = 1; i <= n; i++) x[i] = s[i] + base, y[i] = i;
tsort();
for (int w = 1; w < n; w <<= 1)
{
int tot = 0;
for (int i = n - w + 1; i <= n; i++) y[++tot] = i;
for (int i = 1; i <= n; i++) if (sa[i] > w) y[++tot] = sa[i] - w;
tsort();
swap(x, y);
x[sa[1]] = tot = 1;
for (int i = 1; i <= n; i++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + w] == y[sa[i - 1] + w]) ? tot : ++tot;
m = tot;
}
}
void GetHeight()
{
for (int i = 1; i <= n; i++) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i++)
{
if (k) --k;
while (k <= n && s[sa[rk[i]] + k] == s[sa[rk[i] - 1] + k]) ++k;
h[rk[i]] = k;
}
}
void Build()
{
for (int i = 1; i <= n; i++) st[0][i] = h[i];
for (int i = 2; i <= n; i++) logn[i] = logn[i / 2] + 1;
for (int i = 1; i < MAXK; i++)
{
for (int j = 1; j + (1 << (i - 1)) <= n; j++)
{
st[i][j] = min(st[i - 1][j], st[i - 1][j + (1 << (i - 1))]);
}
}
}
int Query(int l, int r)
{
if (l > r) return 0;
int k = logn[r - l + 1];
return min(st[k][l], st[k][r - (1 << k) + 1]);
}
signed main()
{
// freopen("data.in", "r", stdin); freopen("data.out", "w", stdout);
ios :: sync_with_stdio(false), cin.tie(0), cout.tie(0);
cin >> c;
for (int i = 1; i <= c; i++)
{
cin >> siz[i];
for (int j = 1; j <= siz[i]; j++) cin >> t[i][j];
for (int j = 1; j < siz[i]; j++) t[i][j] = t[i][j + 1] - t[i][j];
if (i != c) t[i][siz[i]] = 4000;
}
for (int i = 1; i <= c; i++)
{
for (int j = 1; j <= siz[i]; j++)
{
s[n + j] = t[i][j];
belong[n + j] = i;
}
n += siz[i];
}
// for (int i = 1; i <= n; i++) cerr << s[i] << ' ';
GetSA();
GetHeight();
Build();
// for (int i = 1; i <= n; i++) cerr << h[i] << ' ';
int ans = 0;
for (int l = 1, r = 0; l <= n; l++)
{
while (r < n && flag < c)
{
++r;
if (cnt[belong[sa[r]]] == 0) flag++;
cnt[belong[sa[r]]]++;
}
if (flag >= c) ans = max(ans, Query(l + 1, r));
cnt[belong[sa[l]]]--;
if (cnt[belong[sa[l]]] == 0) flag--;
}
// cerr << 1;
cout << ans + 1;
QED;
}
luogu P2336 [SCOI2012] 喵星球上的点名
毒瘤题。
首先考虑怎么使用 SA 直接暴力求出答案,因为匹配一个字符串在排序后的后缀串中一定是连续的,所以问题就变成了对于一个区间求其中的不同数字个数,发现这就是一个莫队题目,然后套两个二分就做完了。
开心使用莫队写完后发现大事不妙,因为最后还要求每个喵被点名的次数,这很恶心啊。
但是我们可以使用差分的思想,以时间为轴,则在询问中的一次添加就代表本询问到结束都计算一次,而删除就代表本询问到结束都删除。
这样差分后,有值的地方恰好就是所有产生贡献的地方。
这样这个题就创过去了。
点击查看代码
const int V = 1100, mod = 1e9 + 7, B = 300, MAXK = 22, N = 1e6 + 10; // !!!!!!!!
int x[N], y[N], sa[N], cnt[N], Q, n, m, c, siz[N];
int t[N], s[N];
void tsort()
{
for (int i = 1; i <= n; i++) cnt[x[i]]++;
for (int i = 1; i <= m; i++) cnt[i] += cnt[i - 1];
for (int i = n; i >= 1; i--) sa[cnt[x[y[i]]]--] = y[i], y[i] = 0;
for (int i = 1; i <= m; i++) cnt[i] = 0;
}
void GetSA()
{
m = MAXM;
for (int i = 1; i <= n; i++) x[i] = s[i], y[i] = i;
tsort();
for (int w = 1; w < n; w <<= 1)
{
int tot = 0;
for (int i = n - w + 1; i <= n; i++) y[++tot] = i;
for (int i = 1; i <= n; i++) if (sa[i] > w) y[++tot] = sa[i] - w;
tsort();
swap(x, y);
x[sa[1]] = tot = 1;
for (int i = 2; i <= n; i++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + w] == y[sa[i - 1] + w]) ? tot : ++tot;
m = tot;
}
}
int belong[N], belong2[N]; // the name is too low, but I can't remind better name
// 1 : cin's belong, 2 : mo's belong
int cmp1(int x) // t < s[x]
{
for (int j = 1; j <= c; j++)
{
if (t[j] > s[j + x - 1]) return 0;
if (t[j] < s[j + x - 1]) return 1;
}
return -1;
}
int cmp2(int x) // t <= s[x]
{
for (int j = 1; j <= c; j++)
{
if (t[j] > s[j + x - 1]) return 0;
if (t[j] < s[j + x - 1]) return 1;
}
return 2;
}
int Findl()
{
int l = 1, r = n, pos = n + 1, mid;
while (l <= r)
{
mid = (l + r) >> 1;
if (cmp2(sa[mid]) > 0) r = mid - 1, pos = mid;
else l = mid + 1;
}
if (cmp2(sa[pos]) != 2) return n + 1;
return pos;
}
int Findr()
{
int l = 1, r = n, pos = 1, mid;
while (l <= r)
{
mid = (l + r) >> 1;
if (cmp1(sa[mid]) > 0) r = mid - 1, pos = mid;
else l = mid + 1;
}
pos--;
if (cmp1(sa[pos]) != -1) return 0;
return pos;
}
struct Question
{
int l, r, id;
}q[N];
int res[N], res2[N], ans;
void add(int x, int qwq)
{
if (cnt[belong[sa[x]]] == 0) ans++, res2[belong[sa[x]]] += Q - qwq + 1;
cnt[belong[sa[x]]]++;
}
void del(int x, int qwq)
{
cnt[belong[sa[x]]]--;
if (cnt[belong[sa[x]]] == 0) ans--, res2[belong[sa[x]]] -= Q - qwq + 1;
}
signed main()
{
// freopen("data.in", "r", stdin); freopen("data.out", "w", stdout);
ios :: sync_with_stdio(false), cin.tie(0), cout.tie(0);
cin >> c >> Q;
int lc = c;
for (int i = 1; i <= 2 * c; i++)
{
cin >> siz[i];
for (int j = 1; j <= siz[i]; j++) cin >> t[j], ++t[j];
if (i != 2 * c) siz[i]++, t[siz[i]] = MAXM;
for (int j = 1; j <= siz[i]; j++) s[n + j] = t[j], belong[n + j] = (i + 1) / 2;
n += siz[i];
}
// for (int i = 1; i <= n; i++) cerr << s[i] << ' ';
// cerr << endl;
GetSA();
for (int i = 1; i <= Q; i++)
{
cin >> c, q[i].id = i;
for (int j = 1; j <= c; j++) cin >> t[j], ++t[j];
int l = Findl(), r = Findr();
// cerr << l << ' ' << r << '\n';
// cerr << s[sa[l]] << ' ';
if (l > r) l = 2, r = 1, q[i].id = 0;
q[i].l = l, q[i].r = r;
}
sort(q + 1, q + 1 + Q, [](Question x, Question y)
{
return (belong2[x.l] ^ belong2[y.l]) ? belong[x.l] < belong[y.l] : (belong2[x.l] & 1) ? x.r < y.r : x.r > y.r;
});
int l = 1, r = 0;
for (int i = 1; i <= Q; i++)
{
while (l > q[i].l) add(--l, i);
while (r < q[i].r) add(++r, i);
while (l < q[i].l) del(l++, i);
while (r > q[i].r) del(r--, i);
res[q[i].id] = ans;
}
for (int i = 1; i <= Q; i++) cout << res[i] << '\n';
for (int i = 1; i <= lc; i++) cout << res2[i] << ' ';
cout << '\n';
QED;
}

浙公网安备 33010602011771号