莫队

~~我非常喜欢暴力算法~~

莫队最先由队长莫涛整理提出。是一种离线算法，处理区间询问。运用了分块的思想

适用性广。各种大佬扩展出了一系列的莫队算法。

复杂度分析玄学。在一些题正解难想、难写时，可考虑用莫队骗分，往往有意想不到的结果。

普通莫队

有两个指针 \(L,R\) 表示当前维护 \([L,R]\) 内的答案

要求在 \(O(1)\) 转移到 \([L,R-1],[L,R+1],[L+1,R],[L-1,R]\)

通过离线、对查询排序，使得暴力转移的复杂度可以接受。

采用分块。以 \(l\) 所在块为第一关键字， \(r\) 为第二关键子排序。

复杂度：设块大小为 \(B\) ，则有 \(\frac{n}{B}\) 个块

对于每一个块， \(r\) 是递增的，最坏移动 \(n\) 次，总共 \(\dfrac{n^2}{B}\) 次
\(l\) 在同一块内单次移动是 \(O(B)\) 的，总共 \(O(mB)\)
跨块移动， \(r\) 最多跳 \(n\) 次，\(l\) 最多跳 \(2B\) 次，最多跨块 \(O(\dfrac{n}{B})\) 次，总共 \(O(\dfrac{n^2}{B}+2n)\)

三部分加起来，大致是 \(O(\dfrac{n^2}{B}+mB)\) 这个级别。由均值不等式，当 \(B=\dfrac{n}{\sqrt m}\) 时取最优 \(O(n\sqrt m)\)

根据题目详情，设置块长，能得到更优的复杂度。

大致的实现：

struct qry {
    int l, r, id;
} q[N];
inline bool cmp(qry A, qry B) {
}

// update current ans
inline void add(int x) {
    
}
inline void del(int x) {
}

inline void solve() {
    sort(q + 1, q + T + 1, cmp);
    L = 1, R = 0;
    for (int i = 1, ql, qr, le; i <= T; i++) {
        ql = q[i].l, qr = q[i].r;
        while (L > ql) add(--L);
        while (L < ql) del(L++);
        while (R > qr) del(R--);
        while (R < qr) add(++R);
        ans[q[i].id] = now;
    }
}

注意！

这四句话，由于 del 操作的存在，顺序需要注意

        while (L > ql) add(--L);
        while (L < ql) del(L++);
        while (R > qr) del(R--);
        while (R < qr) add(++R);

在一些题里，这种写法会有问题：移动中可能出现 \(L>R+1\) 的情况，相当于是多删除了一些数

而如果用 set 维护某些东西，会出现 “删除一个不存在的数” 的问题。

正确的顺序是：先进行 add ，再 del ，这样保证始终 \(L\le R+1\) ，修改后代码如下：

        while (L > ql) add(--L);
        while (R < qr) add(++R);
        while (L < ql) del(L++);
        while (R > qr) del(R--);

根据题目，大部分情况，顺序任意。

奇偶排序

奇数块按 \(r\) 递增排序、偶数块按 \(r\) 递减排序。

优化了 \(R\) 指针跳的次数。

例

[国家集训队] 小 Z 的袜子

对于 \(n=m\) 的情况，块长取 \(\sqrt n\) 即可。

用一个桶 \(t\) 存，要求 \(\sum_{c}\dfrac{1}{2}t_c(t_c-1)\)

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 50005;
int n, T, a[N], pos[N], bz[N];
int size, L, R, tot[N];
LL ans[N], now, all[N], G;
struct qry {
    int l, r, id;
} q[N];
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    if (pos[A.l] & 1) return A.r < B.r;
    return A.r > B.r;
}
inline void add(int x) {
    now -= tot[a[x]] * (tot[a[x]] - 1) / 2;
    ++tot[a[x]];
    now += tot[a[x]] * (tot[a[x]] - 1) / 2;
}
inline void del(int x) {
    now -= tot[a[x]] * (tot[a[x]] - 1) / 2;
    --tot[a[x]];
    now += tot[a[x]] * (tot[a[x]] - 1) / 2;
}
int main() {
    scanf("%d%d", &n, &T);
    size = sqrt(n);
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / size + 1;
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
    for (int i = 1; i <= T; i++) scanf("%d%d", &q[i].l, &q[i].r), q[i].id = i;
    sort(q + 1, q + T + 1, cmp);
    L = 1, R = 0;
    for (int i = 1, ql, qr, le; i <= T; i++) {
        ql = q[i].l, qr = q[i].r;
        le = qr - ql + 1;
        while (L > ql) add(--L);
        while (L < ql) del(L++);
        while (R > qr) del(R--);
        while (R < qr) add(++R);
        if (le == 1) bz[q[i].id] = 1;
        ans[q[i].id] = now;
        all[q[i].id] = 1ll * le * (le - 1) / 2;
    }
    for (int i = 1; i <= T; i++) {
        if (bz[i]) puts("0/1");
        else G = __gcd(ans[i], all[i]), printf("%lld/%lld\n", ans[i] / G, all[i] / G);
    }
}

技巧

这部分

inline void add(int x) {
    now -= tot[a[x]] * (tot[a[x]] - 1) / 2;
    ++tot[a[x]];
    now += tot[a[x]] * (tot[a[x]] - 1) / 2;
}
inline void del(int x) {
    now -= tot[a[x]] * (tot[a[x]] - 1) / 2;
    --tot[a[x]];
    now += tot[a[x]] * (tot[a[x]] - 1) / 2;
}

会略显繁琐。实际可以化简

add 操作就是 \(now\) 要加上 \(\dfrac{1}{2}(t_c+1)t_c-\dfrac{1}{2}t_c(t_c-1)\) ，就是加上 \(t_c\)

del 操作就是 \(now\) 要减上 \(\dfrac{1}{2}t_c(t_c-1)-\dfrac{1}{2}(t_c-1)(t_c-2)\) ，就是减去 \(t_c-1\)

inline void add(int x) {
    now += tot[a[x]], tot[a[x]]++;
}
inline void del(int x) {
    tot[a[x]]--, now -= tot[a[x]];
}

实际上，这种化简是很常见的。也能优化常数，减小实现难度

带修莫队

国家集训队]数颜色 / 维护队列

带修改，查询颜色种数。

把修改当成第三维，记录当前是到第几次修改。

其实就是弄一个指针，再操作上移动，当前修改多了就改回来，修改少了就改过去。直到次数恰当。

\((L,R,T)\) 到 \((L,R,T+1)\) 和 \((L,R,T-1)\) 是 \(O(1)\) 的。

具体就是修改后将修改值和原值 swap 一下，改回来也只需要 swap 一下，具体见代码

排序以 \(l\) 所在块、 \(r\) 所在块、 \(t\) 的值排序。

复杂度分析：设块长为 \(B\) ，修改 \(c\) 个，查询 \(q\) 个

\(l\) ：同一块移动 \(B\) ，换块移动 \(2B\) ，所有询问加起来就是 \(qB\)
\(r\) ：
1. \(l\) 同块时，同块移动 \(B\) ，换块移动 \(2B\) ，所有询问就是 \(qB\)
2. \(l\) 换块，最多 \(n\) ，换 \(\dfrac{n}{B}\) 次，共 \(\dfrac{n^2}{B}\)
\(t\) ：
1. \(l,r\) 同块，移动 \(c\) ，有 \(\left(\dfrac{n}{B}\right)^2\) 次 \(l,r\) 同块，共 \(\dfrac{n^2c}{B^2}\)
2. \(l\) 或 \(r\) 换块，移动 \(c\) ，共 \((\dfrac{n}{B})^2\) 次换块，共 \(\dfrac{n^2c}{B^2}\)
一般题目中不会明确 \(c,q\) 的个数，统一用 \(m\) 代替

总和： \(O(mB+\dfrac{n^2}{B}+\dfrac{n^2m}{B^2})\)

\(B\) 的最优取值是一个非常复杂的式子。无须纠结。

看作 \(n=m\) ，得到 \(O(nB+\dfrac{n^2}{B}+\dfrac{n^3}{B^2})\)

设 \(B=n^x\) ，得到 \(O(n^{1+x}+n^{2-x}+n^{3-2x})\) ，要 \(\max(1+x,2-x,3-2x)\) 最小

当 \(x=\dfrac{2}{3}\) 时取得，此时 \(B=n^{\frac{2}{3}}\) 。复杂度为 \(O(n^{\frac{5}{3}})\)

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 133335;
int n, m, size, pos[N], a[N];
int tq, clk, now, ans[N];
int cnt[1000005];
int L = 1, R = 0, T = 0;
char op[5];
struct qry {
    int l, r, t, id;
} q[N];
struct mdy {
    int p, c;
} C[N];
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    if (pos[A.r] ^ pos[B.r]) return pos[A.r] < pos[B.r];
    return A.t < B.t;
}
inline void add(int x) { now += !cnt[a[x]]++; }
inline void del(int x) { now -= !--cnt[a[x]]; }
inline void chan(int t) {
    if (L <= C[t].p && C[t].p <= R) del(C[t].p);
    swap(a[C[t].p], C[t].c);
    if (L <= C[t].p && C[t].p <= R) add(C[t].p);
}
int main() {
    scanf("%d%d", &n, &m);
    size = pow(n, 2.0 / 3.0);
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / size + 1;
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
    for (int i = 1, a, b; i <= m; i++) {
        scanf("%s%d%d", op, &a, &b);
        if (op[0] == 'Q') {
            ++tq, q[tq] = (qry){ a, b, clk, tq };
        } else {
            ++clk, C[clk] = (mdy){ a, b };
        }
    }
    sort(q + 1, q + tq + 1, cmp);
    for (int i = 1, ql, qr, qt; i <= tq; i++) {
        ql = q[i].l, qr = q[i].r, qt = q[i].t;
        while (L < ql) del(L++);
        while (L > ql) add(--L);
        while (R < qr) add(++R);
        while (R > qr) del(R--);
        while (T < qt) chan(++T);
        while (T > qt) chan(T--);
        ans[q[i].id] = now;
    }
    for (int i = 1; i <= tq; i++) printf("%d\n", ans[i]);
}

回滚莫队

在一些题目中，往往出现删除（或增加）非常容易。但是另一个操作却难以实现。

回滚莫队应运而生，解决这种情况

只增不减

数列分块，按 \(l\) 所在块、 \(r\) 的值升序排序。
\(l,r\) 再同一块，暴力处理。
对于同在块 \(T\) 的询问，将莫队左指针设为 \(R_T+1\) ，右指针设为 \(R_T\) ，这是空区间
由于 \(r\) 升序排序，莫队右指针可以一直加，一个块最多 \(n\)

\(l\) 可能乱序，此时左指针从 \(R_T+1\) 出发，加点到达询问位置。

回答完询问，撤销本次移动的影响，注意是撤销，使左端点回到 \(R_T+1\)

每一个询问最多 \(\sqrt{n}\)
相同方式处理下一个块

复杂度依然是 \(O(n\sqrt n)\)

歴史の研究

求 \(\max_c t_c\times c\) ，其中 \(t_c\) 为出现次数。

用桶统计出现次数。加点时更新答案，撤销具体就是在桶中减去出现次数，而不修改答案。

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005, SQ = 320;
int n, m, a[N], tt[N], b[N], tl, Bsz, Btot, pr[SQ], po[N];
int L = 1, R = 0, cnt[N], cur, tc[N];
LL ans[N], tmp, now;
struct qry { int l, r, id; } q[N];
inline bool cmp(qry A, qry B) {
    if (po[A.l] ^ po[B.l]) return po[A.l] < po[B.l];
    return A.r < B.r;
}
inline void add(int x) {
    ++cnt[b[x]];
    now = max(now, 1ll * cnt[b[x]] * a[x]);
}
inline void cancel(int x) {
    --cnt[b[x]];
}
int main() {
    scanf("%d%d", &n, &m), Bsz = sqrt(n), Btot = n / Bsz;
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]), tt[i] = a[i];
    sort(tt + 1, tt + n + 1), tl = unique(tt + 1, tt + n + 1) - tt - 1;
    for (int i = 1; i <= n; i++) b[i] = lower_bound(tt + 1, tt + tl + 1, a[i]) - tt;
    for (int i = 1; i <= Btot; i++) pr[i] = i * Bsz;
    if (pr[Btot] < n) pr[++Btot] = n;
    for (int i = 1; i <= n; i++) po[i] = (i - 1) / Bsz + 1;
    for (int i = 1; i <= m; i++) scanf("%d%d", &q[i].l, &q[i].r), q[i].id = i;
    sort(q + 1, q + m + 1, cmp);

    for (int i = 1, ql, qr; i <= m; i++) {
        ql = q[i].l, qr = q[i].r;
        if (po[ql] == po[qr]) {
            for (int j = ql; j <= qr; j++) ++tc[b[j]];
            tmp = 0;
            for (int j = ql; j <= qr; j++) tmp = max(tmp, 1ll * tc[b[j]] * a[j]);
            for (int j = ql; j <= qr; j++) --tc[b[j]];
            ans[q[i].id] = tmp;
            continue;
        }
        if (cur ^ po[ql]) {
            cur = po[ql];
            while (R > pr[cur]) cancel(R--);
            while (L < pr[cur] + 1) cancel(L++);
            now = 0;
        }
        while (R < qr) add(++R);
        tmp = now;
        while (L > ql) add(--L);
        while (L < pr[cur] + 1) cancel(L++);
        ans[q[i].id] = now, now = tmp;
    }
    for (int i = 1; i <= m; i++) printf("%lld\n", ans[i]);
}

只减不增

同理。区别如下：

右端点降序排序
左端点初始为 \(L_T\) ，右端点为 \(n\)

树上莫队

树上莫队一般是用于处理链的问题，子树的可以用 dfs 序解决。

COT2 - Count on a tree II

链上数颜色。用欧拉序（括号序）转换。求

法是 dfs 得到一个点入栈、出栈顺序。分别记为 \(first_x,last_x\)

性质是：欧拉序上 \([first_x,last_x]\) 之间的点都在子树 \(x\) 中，且都出现两次。

考虑将链转为欧拉序上区间问题。对于一条链 \((u,v)\) ，假设 \(first_u\le first_v\) 即 \(u\) 比 \(v\) 先出现。

\(\text{lca}(u,v)=u\) ，说明 \(v\) 在子树 \(u\) 内，直接转为 \([first_u,first_v]\)

同时区间内出现 2 次的点都不要。因为链不会经过一个点两次，经过了两次一定不在链上
\(\text{lca}(u,v)\ne u\) ，用 \([last_x,first_x]\) ，同时 \(\text{lca}(u,v)\) 没有算，需要单独考虑。

实现时：注意序列长度是 \(2n\) ，用一个数组记录是否出现即可实现取重。

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005;
int n, T, tt[N], a[N], lt, lst[N], Ecnt, pos[N], size, vis[N];
int fr[N], la[N], clk, fa[N][20], dep[N], ord[N], ans[N], cnt[N];
int L = 1, R = 0, now;
struct qry {
    int l, r, x, id;
} q[N];
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    if (pos[A.l] & 1) return A.r < B.r;
    return A.r > B.r;
}
struct Ed { int to, nxt; } e[N];
inline void Ae(int fr, int go) {
    e[++Ecnt] = (Ed){ go, lst[fr] }, lst[fr] = Ecnt;
}
void dfs(int u, int ff) {
    fa[u][0] = ff, dep[u] = dep[ff] + 1;
    for (int i = 1; i <= 16; i++) fa[u][i] = fa[fa[u][i - 1]][i - 1];
    ord[++clk] = u, fr[u] = clk;
    for (int i = lst[u], v; i; i = e[i].nxt)
        if ((v = e[i].to) ^ ff) dfs(v, u);
    ord[++clk] = u, la[u] = clk;
}
inline int LCA(int x, int y) {
    if (dep[x] < dep[y]) x ^= y ^= x ^= y;
    for (int i = 16; ~i; i--)
        if (dep[fa[x][i]] >= dep[y]) x = fa[x][i];
    if (x == y) return x;
    for (int i = 16; ~i; i--)
        if (fa[x][i] ^ fa[y][i]) x = fa[x][i], y = fa[y][i];
    return fa[x][0];
}
inline void work(int x) {
    if (vis[x]) now -= !--cnt[a[x]];
    else now += !cnt[a[x]]++;
    vis[x] ^= 1;
}
int main() {
    scanf("%d%d", &n, &T);
    for (int i = 1; i <= n; i++) scanf("%d", &tt[i]), a[i] = tt[i];
    sort(tt + 1, tt + n + 1);
    lt = unique(tt + 1, tt + n + 1) - tt - 1;
    for (int i = 1; i <= n; i++)
        a[i] = lower_bound(tt + 1, tt + lt + 1, a[i]) - tt;
    for (int i = 1, u, v; i < n; i++) {
        scanf("%d%d", &u, &v);
        Ae(u, v), Ae(v, u);
    }
    dfs(1, 0);
    size = sqrt(clk);
    for (int i = 1; i <= clk; i++) pos[i] = (i - 1) / size + 1;
    for (int i = 1, u, v, ll; i <= T; i++) {
        scanf("%d%d", &u, &v);
        ll = LCA(u, v);
        if (fr[u] > fr[v]) u ^= v ^= u ^= v;
        if (u == ll) q[i] = (qry){ fr[u], fr[v], 0, i };
        else q[i] = (qry){ la[u], fr[v], ll, i };
    }
    sort(q + 1, q + T + 1, cmp);
    for (int i = 1, ql, qr, qx; i <= T; i++) {
        ql = q[i].l, qr = q[i].r, qx = q[i].x;
        while (L < ql) work(ord[L++]);
        while (L > ql) work(ord[--L]);
        while (R < qr) work(ord[++R]);
        while (R > qr) work(ord[R--]);
        if (qx) work(qx);
        ans[q[i].id] = now;
        if (qx) work(qx);
    }
    for (int i = 1; i <= T; i++) printf("%d\n", ans[i]);
}

二维莫队

YALI 往年集训题处的做法。

D2T2 蔬菜

求矩阵中颜色出现次数的平方和。

查询放到了矩阵中。按照四个端点所在块排序。

不同于普通莫队，这里一次修改为 \(O(n)\) 的。

块长随缘了，分析累了。

但是码量、速度碾标算的四维偏序

#include <bits/stdc++.h>using namespace std;typedef unsigned long long uLL;typedef long double LD;typedef long long LL;typedef double db;const int N = 205;int n, m, Ti, a[N][N], b[N * N], le, t[N * N], now, Bsz;struct qry {    int xx, yy, x2, y2, id;} q[100005];int ans[100005];inline bool cmp(qry A, qry B) {    if (A.xx / Bsz != B.xx / Bsz)        return A.xx / Bsz < B.xx / Bsz;    if (A.yy / Bsz != B.yy / Bsz)        return A.yy / Bsz < B.yy / Bsz;    if (A.x2 / Bsz != B.x2 / Bsz)        return A.x2 / Bsz < B.x2 / Bsz;    return A.y2 < B.y2;}inline void add(int x) { now += 1 + 2 * t[x], ++t[x]; }inline void del(int x) { now += 1 - 2 * t[x], --t[x]; }inline void pls(int i, int l, int r, int op) {    for (int j = l; j <= r; j++) add(op ? a[i][j] : a[j][i]);}inline void rmv(int i, int l, int r, int op) {    for (int j = l; j <= r; j++) del(op ? a[i][j] : a[j][i]);}int main() {    // freopen("vegetable.in", "r", stdin);    // freopen("vegetable.out", "w", stdout);    scanf("%d%d%d", &n, &m, &Ti);    for (int i = 1; i <= n; i++)        for (int j = 1; j <= m; j++) scanf("%d", &a[i][j]), b[++le] = a[i][j];    Bsz = sqrt(max(n, m));    sort(b + 1, b + le + 1), le = unique(b + 1, b + le + 1) - b - 1;    for (int i = 1; i <= n; i++)        for (int j = 1; j <= m; j++) a[i][j] = lower_bound(b + 1, b + le + 1, a[i][j]) - b;    for (int i = 1; i <= Ti; i++) scanf("%d%d%d%d", &q[i].xx, &q[i].yy, &q[i].x2, &q[i].y2), q[i].id = i;    sort(q + 1, q + Ti + 1, cmp);    int li = 1, ri = 0, lj = 1, rj = 0;    for (int i = 1, ql, qr; i <= Ti; i++) {        while (li < q[i].xx) rmv(li++, lj, rj, 1);        while (li > q[i].xx) pls(--li, lj, rj, 1);        while (ri < q[i].x2) pls(++ri, lj, rj, 1);        while (ri > q[i].x2) rmv(ri--, lj, rj, 1);        while (lj < q[i].yy) rmv(lj++, li, ri, 0);        while (lj > q[i].yy) pls(--lj, li, ri, 0);        while (rj < q[i].y2) pls(++rj, li, ri, 0);        while (rj > q[i].y2) rmv(rj--, li, ri, 0);        ans[q[i].id] = now;    }    for (int i = 1; i <= Ti; i++) printf("%d\n", ans[i]);}

莫队二次离线

先 % 提出者 lxl ，用于解决莫队转移复杂度高的情况

【模板】莫队二次离线

求区间内满足条件数对个数，普通莫队是 \(O(n\sqrt n\binom{14}{k})\) ，难以接受，但是注意到可以差分

设 \(f(x,l,r)\) 表示 \(x\) 与 \([l,r]\) 中的点形成的合法数对个数，莫队的指针有四种移动方法，讨论一波。

设莫队指针为 \(L,R\) ，询问区间为 \(ql,qr\)

\(L<ql\) ，从 \([L,R]\) 到 \([L+1,R]\) ，减去 \(f(L,L+1,R)=f(L,1,R)-f(L,1,L)\)
\(L>ql\) ，从 \([L,R]\) 到 \([L-1,R]\) ，加上 \(f(L-1,L,R)=f(L-1,1,R)-f(L-1,1,L-1)\)
\(R<qr\) ，从 \([L,R]\) 到 \([L,R+1]\) ，加上 \(f(R+1,L,R)=f(R+1,1,R)-f(R+1,1,L-1)\)
\(R>qr\) ，从 \([L,R]\) 到 \([L,R-1]\) ，减去 \(f(R,L,R-1)=f(R,1,R-1)-f(R,1,L-1)\)

发现如果与处理出 \(f(x,1,x)\) 和 \(f(x,1,x-1)\) ，就解决了一半。

移动指针时加（或减）即可。前缀和优化减小常数

剩下的一部分的贡献和：形如 \(\sum_{x=l}^r f(x,1,pos)\) ，二次离线，在 \(pos\) 处挂一个 \((l,r,id,op)\)

其中 \(id\) 是询问编号， \(op\) 是 1 或 -1 的系数。枚举每一个位置，暴力求解即可。

由于考虑的是变化量，所以答案应该求前缀和。

复杂度是 \(O(n\sqrt n + n\binom{14}{k})\)

#include <bits/stdc++.h>
#define pb push_back
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005;
int n, m, K, a[N], b[N], Tb, cnt[N], pos[N], Bsz;
LL pre1[N], pre2[N], ans[N], out[N];
struct qry { int l, r, id; } q[N];
struct add { int l, r, id, op; } nw;
vector<add> cf[N];
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    return A.r < B.r;
}
int main() {
    scanf("%d%d%d", &n, &m, &K);
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
    if (K > 14) {
        for (int i = 1; i <= m; i++) puts("0");
        return 0;
    }
    for (int i = 0, j, t; i < 16384; i++) {
        j = i, t = 0; while (j) j -= j & -j, ++t;
        if (t == K) b[++Tb] = i;
    }
    // pre1 : f(i, 1, i - 1)
    // pre2 : f(i, 1, i)
    for (int i = 1; i <= n; i++) {
        pre1[i] = pre1[i - 1] + cnt[a[i]];
        for (int j = 1; j <= Tb; j++) ++cnt[a[i] ^ b[j]];
        pre2[i] = pre2[i - 1] + cnt[a[i]];
    }
    Bsz = sqrt(n);
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / Bsz + 1;
    for (int i = 1; i <= m; i++) scanf("%d%d", &q[i].l, &q[i].r), q[i].id = i;
    sort(q + 1, q + m + 1, cmp);
    for (int i = 1, L = 1, R = 0, ql, qr; i <= m; i++) {
        ql = q[i].l, qr = q[i].r;
        ans[i] = pre2[ql - 1] - pre2[L - 1] + pre1[qr] - pre1[R];
        if (L < ql) cf[R].pb((add){ L, ql - 1, i,-1 });
        if (L > ql) cf[R].pb((add){ ql, L - 1, i, 1 });
        L = ql;
        // attention : pointer L moves to ql first
        if (R < qr) cf[L - 1].pb((add){ R + 1, qr, i,-1 });
        if (R > qr) cf[L - 1].pb((add){ qr + 1, R, i, 1 });
        R = qr;
        // -= f(L, L + 1, R) = -f(L, 1, R) + f(L, 1, L);
        // += f(L - 1, L, R) = +f(L - 1, 1, R) - f(L - 1, 1, L - 1)
        // += f(R + 1, L, R) = +f(R + 1, 1, R) - f(R + 1, 1, L - 1);
        // -= f(R, L, R - 1) = -f(R, 1, R - 1) + f(R, 1, L - 1);
    }
    memset(cnt, 0, sizeof(cnt));
    for (int i = 1; i <= n; i++) {
        for (int j = 1; j <= Tb; j++) cnt[a[i] ^ b[j]]++;
        for (int j = 0, Lj = cf[i].size(); j < Lj; j++) {
            nw = cf[i][j];
            register LL tmp = 0;
            for (int k = nw.l; k <= nw.r; k++) tmp += cnt[a[k]];
            ans[nw.id] += nw.op * tmp;
        }
    }
    for (int i = 1; i <= m; i++) ans[i] += ans[i - 1], out[q[i].id] = ans[i];
    for (int i = 1; i <= m; i++) printf("%lld\n", out[i]);
}

莫队加 bitset

bitset 有 \(\frac{n}{64}\) 的复杂度

往往解决区间内和/差/积/max等值域相关问题

小清新人渣的本愿

[Ynoi2017] 由乃的玉米田（比上一题多了除法）

查询是否存在 \(a,b\) 满足相加/减/乘/除等于 \(x\)

和/差用 bitset 解决，乘积直接暴力。

减法：相当于存在 \((a-x,a)\) 用一个 bitset \(s1\) ，第 \(i\) 位记录是否存在 \(i\) 。

如果 s1 & (s1 << x) 有任意一位是 1 ，说明可行。
加法：相当于存在 \((x-a,a)\) ，需要一个反的 bitset \(s2\) ，记录是否存在 \(max-i\)

那么 s2 >> (max - k) 得到是否存在 \((max-i)-(max-k)=k-i\) 了。

s1 & (s2 >> (max - k)) 存在 1 说明可行
乘法：分解质因数。根号级别。

根号分治

解决除法操作：

\(x\le\sqrt{max}\) ，将所有满足情况的 \(x\) 存下来单独处理。

对于同一个 \(x\) ，枚举所有数，维护 \(lst_v\) 表示 \(v\) 最后出现位置，

还有 \(ml_i\) 表示以 \(i\) 为右端点最大的左端点，使得 \([ml_i,i]\) 内存在满足条件的数对。
否则，暴力枚举 \(x\) 的倍数

code

时间复杂度：\(O(n\sqrt n+\dfrac{T\times max}{64}+T\sqrt{max})\) ，十分的鬼畜。。

给出由乃的玉米田的代码

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005;
int n, T, a[N], pos[N], unit, ans[N], cnt[N], L = 1, R = 0, mx, lim, lst[N], ml[N], Qtot;
bitset<N> s1, s2;
struct qry { int l, r, x, op, id; } q[N];
vector<qry> q2[320];
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    return A.r < B.r;
}
inline void add(int x) {
    ++cnt[x = a[x]];
    if (cnt[x] == 1) s1[x] = s2[100000 - x] = 1;
}
inline void del(int x) {
    --cnt[x = a[x]];
    if (!cnt[x]) s1[x] = s2[100000 - x] = 0;
}
int main() {
    scanf("%d%d", &n, &T);
    unit = sqrt(n);
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / unit + 1;
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]), mx = max(mx, a[i]);
    lim = sqrt(mx);
    for (int i = 1, op, l, r, x; i <= T; i++) {
        scanf("%d%d%d%d", &op, &l, &r, &x);
        if (op == 4 && x <= lim) q2[x].push_back((qry){ l, r, x, op, i });
        else q[++Qtot] = (qry){ l, r, x, op, i };
    }
    for (int w = 1; w <= lim; w++) {
        memset(lst, 0, sizeof(lst));
        memset(ml, 0, sizeof(ml));
        if (q2[w].empty()) continue;
        for (int i = 1, p = 0; i <= n; i++) {
            lst[a[i]] = i;
            if (w * a[i] <= mx) p = max(p, lst[w * a[i]]);
            if (a[i] % w == 0) p = max(p, lst[a[i] / w]);
            ml[i] = p;
        }
        for (qry qq : q2[w]) ans[qq.id] = (qq.l <= ml[qq.r]);
    }
    sort(q + 1, q + Qtot + 1, cmp);
    for (int i = 1, ql, qr, qx; i <= Qtot; i++) {
        ql = q[i].l, qr = q[i].r, qx = q[i].x;
        while (L < ql) del(L++);
        while (L > ql) add(--L);
        while (R < qr) add(++R);
        while (R > qr) del(R--);
        if (q[i].op == 1) ans[q[i].id] = (s1 & (s1 >> qx)).any();
        else if (q[i].op == 2) ans[q[i].id] = (s1 & (s2 >> (n - qx))).any();
        else if (q[i].op == 3) {
            for (int j = 1; j * j <= qx; j++)
                if (qx % j == 0)
                    if (s1[j] && s1[qx / j]) {
                        ans[q[i].id] = 1; break;
                    }
        } else {
            for (int j = 1; j * qx <= mx; j++)
                if (s1[j] && s1[j * qx]) { ans[q[i].id] = 1; break; }
        }
    }
    for (int i = 1; i <= T; i++) puts(ans[i] ? "yuno" : "yumi");
}

应用

[WC2013] 糖果公园

树上询问路径，求 \(\sum_c val_c\sum_{i=1}^{cnt_c}w_i\) ，带修改。

树上带修莫队，弄出欧拉序后写带修莫队，板子题，锻炼代码实现。。

#include <bits/stdc++.h>
#define ri register int
using namespace std;
char buf[100000], *Sf = buf, *Tf = buf;
inline char G() {
	return Sf == Tf && (Tf = (Sf = buf) + fread(buf, 1, 100000, stdin), Sf == Tf) ? EOF : *Sf++;
}
inline int Rd() {
	ri x = 0;
	static char C = G();
	for (; C < '0' || C > '9'; C = G()) ;
	for (; C > '/' && C < ':'; C = G()) x = (x << 1) + (x << 3) + (C ^ 48);
	return x;
}
typedef long long LL;
const int N = 200005;
int n, m, Ti, v[N], w[N], a[N], lst[N], Ecnt;
int fr[N], la[N], ord[N], clk, Tc, Tq, sz[N], top[N];
int fa[N], dep[N], Bs, pos[N], vis[N], sn[N];
int L, R, T, cnt[N];
LL ans[N], now;
struct qry { int l, r, t, x, id; } q[N];
struct chg { int p, x; } C[N];
struct Ed { int to, nxt; } e[N << 1];
inline void Ae(ri fr, ri go) {
    e[++Ecnt] = (Ed){ go, lst[fr] }, lst[fr] = Ecnt;
}
void dfs(ri u, ri ff) {
    fr[u] = ++clk, ord[clk] = u;
	dep[u] = dep[fa[u] = ff] + 1, sz[u] = 1;
    for (ri i = lst[u], v; i; i = e[i].nxt)
		if ((v = e[i].to) ^ ff) {
			dfs(v, u), sz[u] += sz[v];
			if (sz[v] > sz[sn[u]]) sn[u] = v;
		}
    la[u] = ++clk, ord[clk] = u;
}
void dfs2(ri u, ri nw) {
	top[u] = nw; if (!sn[u]) return; dfs2(sn[u], nw);
	for (ri i = lst[u], v; i; i = e[i].nxt)
	if ((v = e[i].to) ^ fa[u] && v ^ sn[u]) dfs2(v, v);
}
inline int LCA(ri x, ri y) {
	for (;top[x] ^ top[y]; x = fa[top[x]])
		if (dep[top[x]] < dep[top[y]]) x ^= y ^= x ^= y;
	return dep[x] < dep[y] ? x : y;
}
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    if (pos[A.r] ^ pos[B.r]) return pos[A.r] < pos[B.r];
    return A.t < B.t;
}
inline void add(ri x) { now += 1ll * w[++cnt[x]] * v[x]; }
inline void del(ri x) { now -= 1ll * w[cnt[x]--] * v[x]; }
inline void work(ri x) { (vis[x] ^= 1) ? add(a[x]) : del(a[x]); }
inline void chan(ri t) {
    if (vis[C[t].p]) del(a[C[t].p]), add(C[t].x);
    swap(a[C[t].p], C[t].x);
}
int main() {
    n = Rd(), m = Rd(), Ti = Rd();
    for (ri i = 1; i <= m; i++) v[i] = Rd();
    for (ri i = 1; i <= n; i++) w[i] = Rd();
    for (ri i = 1, u, v; i < n; i++) {
        u = Rd(), v = Rd(), Ae(u, v), Ae(v, u);
    }
    for (ri i = 1; i <= n; i++) a[i] = Rd();
    dfs(1, 0), dfs2(1, 1);
    for (ri op, x, y, ff; Ti--; ) {
    	op = Rd(), x = Rd(), y = Rd();
        if (!op) ++Tc, C[Tc] = (chg){ x, y };
        else {
            ff = LCA(x, y), ++Tq;
            if (fr[x] > fr[y]) x ^= y ^= x ^= y;
            if (x == ff) q[Tq] = (qry){ fr[x], fr[y], Tc, 0, Tq };
            else q[Tq] = (qry){ la[x], fr[y], Tc, ff, Tq };
        }
    }
    Bs = pow(clk, 2.0 / 3.0);
    for (ri i = 1; i <= clk; i++) pos[i] = (i - 1) / Bs + 1;
    sort(q + 1, q + Tq + 1, cmp);
    L = 1, R = 0, T = 0;
    for (ri i = 1, ql, qr, qt; i <= Tq; i++) {
        ql = q[i].l, qr = q[i].r, qt = q[i].t;
        while (L < ql) work(ord[L++]);
        while (L > ql) work(ord[--L]);
        while (R < qr) work(ord[++R]);
        while (R > qr) work(ord[R--]);
        while (T < qt) chan(++T);
        while (T > qt) chan(T--);
        if (q[i].x) work(q[i].x);
        ans[q[i].id] = now;
        if (q[i].x) work(q[i].x);
    }
    for (ri i = 1; i <= Tq; i++) printf("%lld\n", ans[i]);
}

[SNOI2017]一个简单的询问

求 \(\sum_x\text{get}(l_1,r_1,x)\times\text{get}(l_2,r_2,x)\) ，其中 \(\text{get}(l,r,x)\) 表示 \([l,r]\) 中 \(x\) 的个数

差分，得 \(\sum_x(\text{get}(1,r_1,x)-\text{get}(1,l_1-1,x))(\text{get}(1,r_2,x)-\text{get}(1,l_2-1,x))\)

令 \(g(p,x)=\text{get}(1,p,x)\) ，

得 \(\sum_{x}g(r_1,x)g(r_2,x)-g(r_1,x)g(l_2-1,x)-g(l_1-1,x)g(r_2,x)+g(l_1-1,x)g(l_2-1,x)\)

拆成四个询问。用两个数组维护。

这道题其实是运用了莫队的思想，并不是经典的莫队。

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 50005;
int n, a[N], tq, T, sz, pos[N], cnt[N][2];
LL ans[N], now;
int L = 0, R = 0;
struct qry {
    int a, b, op, id;
} q[N << 2];
inline bool cmp(qry A, qry B) {
    if (pos[A.a] != pos[B.a]) return pos[A.a] < pos[B.a];
    return A.b < B.b;
}
inline void add(int x, int w) {
	now += 1ll * cnt[a[x]][w];
    ++cnt[a[x]][w ^ 1];
}
inline void del(int x, int w) {
	now -= 1ll * cnt[a[x]][w];
    --cnt[a[x]][w ^ 1];
}
int main() {
    scanf("%d", &n);
    sz = sqrt(n);
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / sz + 1;
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
    scanf("%d", &T);
    for (int i = 1, a, b, c, d; i <= T; i++) {
        scanf("%d%d%d%d", &a, &b, &c, &d);
        q[++tq] = (qry){ a - 1, c - 1, 1, i };
        q[++tq] = (qry){ a - 1, d, -1, i };
        q[++tq] = (qry){ b, d, 1, i };
        q[++tq] = (qry){ b, c - 1, -1, i };
    }
    for (int i = 1; i <= tq; i++)
        if (q[i].a > q[i].b) swap(q[i].a, q[i].b);
    sort(q + 1, q + tq + 1, cmp);
    for (int i = 1, ql, qr; i <= tq; i++) {
        ql = q[i].a, qr = q[i].b;
        while (L < ql) add(++L, 0);
        while (L > ql) del(L--, 0);
        while (R < qr) add(++R, 1);
        while (R > qr) del(R--, 1);
        ans[q[i].id] += now * q[i].op;
    }
    for (int i = 1; i <= T; i++) printf("%lld\n", ans[i]);
}

[Ynoi2016] 这是我自己的发明

前置知识：上一题。将询问放到了子树上，用 dfs 序转换。

换根对于一个 \(x\) 只有两种情况：根在不在子树 \(x\) 中（当根为 1 时）

对于根在子树 \(x\) 的情况其实就是整一棵树挖掉 \(x\) 的某一个儿子所在子树。

这个儿子所在子树是包含根的，开 map 按 \(dfn\) 挂上所有儿子，用新的根的 \(dfn\) 一次 lower_bound 即可。

询问要么是一个区间，要么是 \([1,n]\) 挖掉一个区间

设 \(f([a,b],[c,d])\) 表示区间 \([a,b]\) 和 \([c,d]\) 的答案，\(pre_i=f([1,i],[1,n])\) 有两种情况较特殊

\[f([l,r],[1,L)\cup(R,n])=f([l,r],[1,n])-f([l,r],[L,R])=pre_r-pre_l-f([l,r],[L,R])\\ \]

和

\[f([1,l)\cup(r,n],[1,L)\cup(R,n])=\\f([1,n],[1,n])-f([l,r],[1,n])-f([1,n],[L,R])+ f([l,r],[L,R])=\cdots \]

发现其实只用求 \(f([l,r],[L,R])\) ，只拆成四个询问即可

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005, M = 500005;
int n, T, a[N], b[N], lst[N], ecnt, clk, st[N], ed[N], Rt;
int tq, pos[N], sz, cnt[N][2], L = 0, R = 0, tc[N];
LL now, ans[M], pre[N];
struct qry {
    int a, b, op, id;
} q[M << 2];
struct Ed { int to, nxt; } e[N << 1];
inline void Ae(int fr, int go) {
    e[++ecnt] = (Ed){ go, lst[fr] }, lst[fr] = ecnt;
}
map<int, int> ch[N];
void dfs(int u, int ff) {
    st[u] = ++clk;
    for (int i = lst[u], v; i; i = e[i].nxt)
        if ((v = e[i].to) ^ ff) dfs(v, u), ch[u][ed[v]] = v;
    ed[u] = clk;
}
inline bool cmp(qry A, qry B) {
    if (pos[A.a] ^ pos[B.a]) return pos[A.a] < pos[B.a];
    return A.b < B.b;
}
inline void add(int x, int w) {
    ++cnt[a[x]][w];
    now += cnt[a[x]][w ^ 1];
}
inline void del(int x, int w) {
    --cnt[a[x]][w];
    now -= cnt[a[x]][w ^ 1];
}
int main() {
    scanf("%d%d", &n, &T);
    sz = n / sqrt(T);
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / sz + 1;
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]), b[i] = a[i];
    sort(a + 1, a + n + 1);
    tq = unique(a + 1, a + n + 1) - a - 1;
    for (int i = 1; i <= n; i++)
        b[i] = lower_bound(a + 1, a + tq + 1, b[i]) - a;
    for (int i = 1, u, v; i < n; i++) {
        scanf("%d%d", &u, &v);
        Ae(u, v), Ae(v, u);
    }
    Rt = 1;
    dfs(1, 0);
    tq = 0;
    for (int i = 1; i <= n; i++) ++tc[a[st[i]] = b[i]];
    for (int i = 1; i <= n; i++) pre[i] = pre[i - 1] + tc[a[i]];
    for (int i = 1, op, x, y, tx, ty, a, b, c, d; i <= T; i++) {
        scanf("%d%d", &op, &x);
        if (op == 1) Rt = x, --i, --T;
        else {
            scanf("%d", &y);
            tx = (st[x] <= st[Rt] && ed[Rt] <= ed[x]); if (x == Rt) x = 1, tx = 0;
            ty = (st[y] <= st[Rt] && ed[Rt] <= ed[y]); if (y == Rt) y = 1, ty = 0;
            if (tx) x = ch[x].lower_bound(st[Rt])->second;
            if (ty) y = ch[y].lower_bound(st[Rt])->second;
            a = st[x] - 1, b = ed[x], c = st[y] - 1, d = ed[y];
            if (tx && ty) ans[i] += pre[n];
            op = (tx == ty ? -1 : 1);
            if (tx) ans[i] += (pre[d] - pre[c]) * op;
            if (ty) ans[i] += (pre[b] - pre[a]) * op;
            q[++tq] = (qry){ a, c, -op, i };
            q[++tq] = (qry){ a, d, op, i };
            q[++tq] = (qry){ b, d, -op, i };
            q[++tq] = (qry){ b, c, op, i };
        }
    }
    sort(q + 1, q + tq + 1, cmp);
    for (int i = 1, ql, qr; i <= tq; i++) {
        ql = q[i].a, qr = q[i].b;
        while (L < ql) add(++L, 0);
        while (L > ql) del(L--, 0);
        while (R < qr) add(++R, 1);
        while (R > qr) del(R--, 1);
        ans[q[i].id] += now * q[i].op;
    }
    for (int i = 1; i <= T; i++) printf("%lld\n", ans[i]);
}

Gty的二逼妹子序列

求区间内值在 \([a,b]\) 范围内的数的个数。

我会树状数组！

\(O(n\sqrt n\log n)\) ，会超时。

根号平衡

运用分块的优越性质：分块可以实现 \(O(1)\) 插入， \(O(\sqrt n)\) 查询。或者 \(O(\sqrt n)\) 插入 \(O(1)\) 查询。

莫队需要大量的插入操作，这使得复杂度均匀为 \(O(\log n)\) 的 bit 难以承受。

这到题，对值域分块，实现 \(O(1)\) 插入 \(O(\sqrt n)\) 查询，总共 \(O((n+m)\sqrt n)\)

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005, M = 1000005, SQ = 1005;
int n, m, a[N], Bsz, Btot, pos[N], pl[SQ], pr[SQ];
int L, R, ans[M], cnt[N], sm[SQ];
struct qry { int l, r, a, b, id; } q[M];
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    return A.r < B.r;
}
inline void add(int x) {
    if (!cnt[x]++) ++sm[pos[x]];
}
inline void del(int x) {
    if (!--cnt[x]) --sm[pos[x]];
}
inline int ask(int l, int r) {
    register int p = pos[l], q = pos[r], re = 0;
    if (p == q) {
        for (int i = l; i <= r; i++) re += (cnt[i] > 0);
        return re;
    }
    for (int i = l; i <= pr[p]; i++) re += (cnt[i] > 0);
    for (int i = pl[q]; i <= r; i++) re += (cnt[i] > 0);
    for (int i = p + 1; i < q; i++) re += sm[i];
    return re;
}
int main() {
    scanf("%d%d", &n, &m);
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
    for (int i = 1; i <= m; i++) scanf("%d%d%d%d", &q[i].l, &q[i].r, &q[i].a, &q[i].b), q[i].id = i;
    Bsz = sqrt(n), Btot = (n - 1) / Bsz + 1;
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / Bsz + 1;
    for (int i = 1; i <= Btot; i++) pl[i] = pr[i - 1] + 1, pr[i] = min(n, i * Bsz);
    sort(q + 1, q + m + 1, cmp);
    L = 1, R = 0;
    for (int i = 1, ql, qr; i <= m; i++) {
        ql = q[i].l, qr = q[i].r;
        while (L < ql) del(a[L++]);
        while (L > ql) add(a[--L]);
        while (R < qr) add(a[++R]);
        while (R > qr) del(a[R--]);
        ans[q[i].id] = ask(q[i].a, q[i].b);
    }
    for (int i = 1; i <= m; i++) printf("%d\n", ans[i]);
}

[Ynoi2019] Yuno loves sqrt technology II

[Ynoi2019 模拟赛] Yuno loves sqrt technology II

先 % 出题人 lxl

求区间逆序对个数。

暴力：树状数组转移，\(O(n\sqrt n\log n)\) ，在神仙数据下只有。。20 pts

转移复杂度神仙，考虑二次离线。

剩下的就比较模板了，但是如果还用 bit 的话，大量的查询导致超时。

需要记录一个向前形成的逆序对和一个向后形成的逆序对

还记得根号平衡？用 \(O(\sqrt n)\) 插入 \(O(1)\) 查询的分块就可以了。

#include <bits/stdc++.h>
#define pb push_back
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005, SQ = 325;
int n, m, a[N], b[N], Tb, pl[SQ], pr[SQ], po[N], Bsz, Btot;
int s1[SQ], s2[SQ], t1[N], t2[N];
LL pre1[N], pre2[N], ans[N], out[N];
// 1 : front
// 2 : back
struct qry { int l, r, id; } q[N];
struct mdy { int l, r, id, op, vl; } nw;
vector<mdy> d[N];
inline bool cmp(qry A, qry B) {
    if (po[A.l] ^ po[B.l]) return po[A.l] < po[B.l];
    if (po[A.l] & 1) return A.r < B.r;
    return A.r > B.r;
}
inline void clear() {
    memset(s1, 0, sizeof(s1)), memset(t1, 0, sizeof(t1));
    memset(s2, 0, sizeof(s2)), memset(t2, 0, sizeof(t2));
}
inline void add(int x) {
    register int p = po[x];
    for (int i = 1; i < p; i++) ++s1[i];
    for (int i = p + 1; i <= Btot; i++) ++s2[i];
    for (int i = pl[p]; i < x; i++) ++t1[i];
    for (int i = x + 1; i <= pr[p]; i++) ++t2[i];
}
inline int ask1(int x) { return s1[po[x]] + t1[x]; }
inline int ask2(int x) { return s2[po[x]] + t2[x]; }
int main() {
    scanf("%d%d", &n, &m);
    for (int i = 1; i <= n; i++) scanf("%d", &a[i]), b[i] = a[i];
    sort(b + 1, b + n + 1), Tb = unique(b + 1, b + n + 1) - b - 1;
    for (int i = 1; i <= n; i++) a[i] = lower_bound(b + 1, b + Tb + 1, a[i]) - b;
    Bsz = sqrt(n);
    for (int i = 1; i <= n; i++) po[i] = (i - 1) / Bsz + 1;
    Btot = po[n];
    for (int i = 1; i <= Btot; i++) pl[i] = pr[i - 1] + 1, pr[i] = i * Bsz;
    pr[Btot] = n;
    for (int i = 1; i <= m; i++) scanf("%d%d", &q[i].l, &q[i].r), q[i].id = i;
    sort(q + 1, q + m + 1, cmp);
    for (int i = 1; i <= n; i++) {
        pre1[i] = pre1[i - 1] + ask1(a[i]);
        pre2[i] = pre2[i - 1] + ask2(a[i]);
        add(a[i]);
    }
    for (int i = 1, ql, qr, L = 1, R = 0; i <= m; i++) {
        ql = q[i].l, qr = q[i].r;
        ans[i] = pre2[ql - 1] - pre2[L - 1] + pre1[qr] - pre1[R];
        if (L < ql) d[qr].pb((mdy){ L, ql - 1, i, 1,-1 });
        if (L > ql) d[qr].pb((mdy){ ql, L - 1, i, 1, 1 });
        if (R < qr) d[L - 1].pb((mdy){ R + 1, qr, i, 0,-1 });
        if (R > qr) d[L - 1].pb((mdy){ qr + 1, R, i, 0, 1 });
        L = ql, R = qr;
    }
    clear();
    for (int i = 1; i <= n; i++) {
        add(a[i]);
        for (int j = 0, Lj = d[i].size(); j < Lj; j++) {
            nw = d[i][j];
            register LL tmp = 0;
            for (int k = nw.l; k <= nw.r; k++) {
                if (nw.op) tmp += ask2(a[k]);
                else tmp += ask1(a[k]);
            }
            ans[nw.id] += tmp * nw.vl;
        }
    }
    for (int i = 1; i <= m; i++) ans[i] += ans[i - 1], out[q[i].id] = ans[i];
    for (int i = 1; i <= m; i++) printf("%lld\n", out[i]);
}

[Ynoi2015] 此时此刻的光辉

求区间内所有数乘积的约数个数 \(\pmod {19260817}\)

\(n\le 10^5,a_i\le 10^9\)

用约数个数定理计算，Pollard-Rho 分解质因数后莫队可以直接算贡献。

却发现： \(10^9\) 级别的质数个数，难以保证复杂度。

想到一个定理：一个数 \(v\) 只会有最多一个 \(\ge\sqrt v\) 的质因子。

预处理出 \(\le\sqrt{v}\) 的质数，然后求前缀和？

又发现：\(\sqrt{10^9}\) 约等于 \(31622\) ，大约有 \(3401\) 个质数，查询和预处理都是 \(O(3401n)\) ，难以接受。

扩展一下那个定理：一个数 \(v\) 最多只会有两个 \(\ge\sqrt[3]{v}\) 的质因子。

而 \(\sqrt[3]{10^9}=1000\) ，大约只有 \(168\) 个质数， \(O(168n)\) 的预处理是可行的。

剩下的两个因子，用调用一次 PR 就可以了，不要写 dfs ，否则被卡常。

将大于 \(1000\) 的因子存下来，离散化是必然的

由于一个数剩下最多两个因子，保证莫队更新的复杂度为 \(O(1)\) ， \(\le 1000\) 的因子用前缀和查询即可

卡常难度在 PR ，主要是我的 MR 是 \(O(\log^2)\) 的。。。

注意预处理逆元需要预处理到 \(2n\) ，每个数会有两个 \(\ge 1000\) 的质因子

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
char buf[100000], *S = buf, *T = buf;
inline char Gc() {
    return S == T && (T = (S = buf) + fread(buf, 1, 100000, stdin), S == T) ? EOF : *S++;
}
inline int Rd() {
    register int x = 0, C = Gc();
    for (; C < '0' || C > '9'; C = Gc()) ;
    for (; C > '/' && C < ':'; C = Gc()) x = (x << 1) + (x << 3) + (C ^ 48);
    return x;
}
inline int Pow(int x, int y, int p) {
    register int res = 1;
    for (; y; y >>= 1, x = 1ll * x * x % p)
        if (y & 1) res = 1ll * res * x % p;
    return res;
}
inline bool Mr(LL n, LL p) {
    if (Pow(p, n - 1, n) != 1) return 0;
    register LL q = n - 1, o;
    while (!(q & 1)) {
        q >>= 1, o = Pow(p, q, n);
        if (o != 1 && o != n - 1) return 0;
        if (o == n - 1) return 1;
    }
    return 1;
}
inline bool Prime(int n) {
    if (n < 2) return false;
    if (n == 2 || n == 3 || n == 5 || n == 7 || n == 43) return true;
    return Mr(n, 2) && Mr(n, 3) && Mr(n, 5) && Mr(n, 7) && Mr(n, 43);
}
int gcd(int a, int b) { return b ? gcd(b, a % b) : a; }
inline int Rho(int n) {
    register int p, q, x, y, z, c, g;
    for (;;) {
        x = y = rand() % n, c = rand() % n;
        p = 0, q = z = 1;
        while (++p) {
            x = (1ll * x * x % n + c) % n;
            z = 1ll * z * abs(y - x) % n;
            if (x == y || !z) break;
            if (!(p % 127) || p == q) {
                g = gcd(z, n);
                if (g > 1) return g;
                if (p == q) q <<= 1, y = x;
            }
        }
    }
}
const int P = 19260817, N = 2e5 + 5;
int n, m, id[1005], Pcnt, pr[1005], s[N][1005], a[N], b[N], Bsz, ans[N], now, inv[N], tt[N], pp[N], Pl;
struct qry { int l, r, id; } q[N];
inline bool cmp(qry A, qry B) {
    if (A.l / Bsz != B.l / Bsz) return A.l < B.l;
    return (A.l / Bsz) & 1 ? A.r > B.r : A.r < B.r;
}
inline void ad(int v) { now = 1ll * now * inv[tt[v]] % P * (tt[v] + 1) % P, tt[v]++; }
inline void dl(int v) { now = 1ll * now * inv[tt[v]] % P * (tt[v] - 1) % P, tt[v]--; }
inline void add(int p) { if (a[p]) ad(a[p]); if (b[p]) ad(b[p]); }
inline void del(int p) { if (a[p]) dl(a[p]); if (b[p]) dl(b[p]); }
int main() {
    for (int i = 1; i <= 1000; i++) id[i] = 1;
    for (int i = 2; i <= 1000; i++) {
        if (!id[i]) continue; pr[id[i] = ++Pcnt] = i;
        for (int j = i + i; j <= 1000; j += i) id[j] = 0;
    }
    inv[1] = 1;
    for (int i = 2; i <= 200000; i++)
        inv[i] = 1ll * (P - P / i) * inv[P % i] % P;
    n = Rd(), m = Rd();
    for (int i = 1, x, y; i <= n; i++) {
        x = Rd();
        memcpy(s[i], s[i - 1], sizeof(s[i]));
        for (int j = 1; j <= Pcnt && pr[j] * pr[j] <= x; j++)
            while (x % pr[j] == 0) x /= pr[j], s[i][j]++;
        if (x > 1) {
            if (x <= pr[Pcnt]) ++s[i][id[x]];
            else {
                if (!Prime(x))
                    b[i] = pp[++Pl] = y = Rho(x), x /= y;
                if (x > 1) a[i] = x, pp[++Pl] = x;
            }
        }
    }
    sort(pp + 1, pp + Pl + 1);
    Pl = unique(pp + 1, pp + Pl + 1) - pp - 1;
    for (int i = 1; i <= n; i++) {
        if (a[i]) a[i] = lower_bound(pp + 1, pp + Pl + 1, a[i]) - pp;
        if (b[i]) b[i] = lower_bound(pp + 1, pp + Pl + 1, b[i]) - pp;
    }
    Bsz = sqrt(n);
    for (int i = 1; i <= m; i++) q[i].l = Rd(), q[i].r = Rd(), q[i].id = i;
    sort(q + 1, q + m + 1, cmp);
    for (int i = 1; i <= n * 2; i++) tt[i] = 1;
    int L = 1, R = 0;
    now = 1;
    for (int i = 1, ql, qr; i <= m; i++) {
        ql = q[i].l, qr = q[i].r;
        while (L > ql) add(--L);
        while (R < qr) add(++R);
        while (L < ql) del(L++);
        while (R > qr) del(R--);
        ans[q[i].id] = now;
        for (int j = 1; j <= Pcnt; j++)
            ans[q[i].id] = 1ll * ans[q[i].id] * (s[qr][j] - s[ql - 1][j] + 1) % P;
    }
    for (int i = 1; i <= m; i++) printf("%d\n", ans[i]);
}

[Ynoi2015] 盼君勿忘

莫队只是工具，以思维难度为主。

求区间内子序列分别取重后的和 \(\pmod p\)

假设数 \(x\) 在区间 \([l,r]\) 内出现 \(k\) 次，那么包含 \(x\) 的子序列有 \(2^{r-l+1}-2^{r-l+1-k}\) 个

所以 \(x\) 的贡献就是 \(x\times (2^{r-l+1}-2^{r-l+1-k})\)

设 \(s_i\) 为出现次数为 \(i\) 的和， \(cnt_i\) 为数 \(i\) 出现次数。

双向链表维护出现次数为 \(i\) 的数的和，不同的 \(i\) 值最多 \(\sqrt n\) 个。遍历一次链表可以算出答案。

复杂度为 \(O((n+m)\sqrt n)\)

光速幂

每一个询问模数不同，快速幂？平白多处一个 \(\log\) ？预处理？直接 \(O(n)\) ？

引出黑科技：光速幂。

对于每一个询问，预处理 \(2^0,2^1,\cdots,2^{\sqrt n-1}\) 和 \(2^{\sqrt n},2^{2\sqrt n},\cdots,2^n\)

求一个 \(2^p\) ，用 \(p=k\sqrt n+l(0\le l<\sqrt n)\) 计算即可。

预处理 \(O(\sqrt n)\) ，查询 \(O(1)\)

#include <bits/stdc++.h>
#define pb push_back
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
char buf[100000], *S = buf, *T = buf;
inline char Gc() {
    return S == T && (T = (S = buf) + fread(buf, 1, 100000, stdin), S == T) ? EOF : *S++;
}
inline int rd() {
    register int x = 0;
    static char C = Gc();
    for (; C < '0' || x > '9'; C = Gc()) ;
    for (; C > '/' && C < ':'; C = Gc()) x = (x << 1) + (x << 3) + (C ^ 48);
    return x;
}
const int N = 100005;
int n, m, a[N], Bsz, Btot, po[N], L = 1, R = 0;
int pre[N], nxt[N], Ed;
LL sm[N], ans[N], cnt[N], p1[N], p2[N], v1, v2;
struct qry { int l, r, p, id; } q[N];
inline bool cmp(qry A, qry B) {
    if (po[A.l] ^ po[B.l]) return po[A.l] < po[B.l];
    if (po[A.l] & 1) return A.r < B.r;
    return A.r > B.r;
}
inline void ins(int x) {
    nxt[Ed] = x, pre[x] = Ed, Ed = x;
}
inline void dlt(int x) {
    if (x != Ed) nxt[pre[x]] = nxt[x], pre[nxt[x]] = pre[x];
    else nxt[pre[x]] = 0, Ed = pre[x];
    pre[x] = nxt[x] = 0;
}
inline int pow2(int x, int P) {
    return p2[x / Bsz] * p1[x % Bsz] % P;
}
inline void add(int x) {
    sm[cnt[x]] -= x;
    if (!sm[cnt[x]]) dlt(cnt[x]);
    ++cnt[x];
    if (!sm[cnt[x]]) ins(cnt[x]);
    sm[cnt[x]] += x;
}
inline void del(int x) {
    sm[cnt[x]] -= x;
    if (!sm[cnt[x]]) dlt(cnt[x]);
    --cnt[x];
    if (!sm[cnt[x]]) ins(cnt[x]);
    sm[cnt[x]] += x;
}
inline void init(int P) {
    p1[0] = p2[0] = 1;
    for (int i = 1; i <= Bsz; i++)
        p1[i] = 2 * p1[i - 1] % P;
    for (int i = 1; i <= Btot; i++)
        p2[i] = p2[i - 1] * p1[Bsz] % P;
}
int main() {
    n = rd(), m = rd(), Bsz = sqrt(n);
    for (int i = 1; i <= n; i++) a[i] = rd(), po[i] = (i - 1) / Bsz + 1;
    Btot = po[n];
    for (int i = 1; i <= m; i++) q[i] = (qry){ rd(), rd(), rd(), i };
    sort(q + 1, q + m + 1, cmp);
    for (int i = 1, ql, qr, P; i <= m; i++) {
        ql = q[i].l, qr = q[i].r, P = q[i].p;
        init(P);
        while (L < ql) del(a[L++]);
        while (L > ql) add(a[--L]);
        while (R < qr) add(a[++R]);
        while (R > qr) del(a[R--]);
        for (int j = nxt[0]; j; j = nxt[j]) {
            v1 = pow2(qr - ql + 1, P);
            v2 = pow2(qr - ql + 1 - j, P);
            (ans[q[i].id] += sm[j] * (v1 - v2 + P) % P) %= P;
        }
    }
    for (int i = 1; i <= m; i++) printf("%lld\n", ans[i]);
}

『MdOI R1』Path

trie 显然。

结论：对于两条不相交的路径，存在一点 \(u\) 使得一条路径在子树 \(u\) 内，一条在子树外。

转为 dfs 序，将数组复制一份，对于每一个 \(u\) ，相当于两个询问。

由于是求 max ，用只增不减的回滚莫队即可。

\(O(n\sqrt n\log w)\)

#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long uLL;
typedef long double LD;
typedef long long LL;
typedef double db;
const int N = 100005;
int n, lst[N], Ecnt, xo[N], dfn[N], clk, sz[N], unit, pos[N], Btot, pr[N], tq, dis[N];
int ch[N * 30][2], tg[N * 30], Ttot, L, R, now, tmp, ans[N], out, cur, ed[N], rk[N];
struct Ed { int to, nxt, qz; } e[N << 1];
inline void Ae(int fr, int go, int vl) {
    e[++Ecnt] = (Ed){ go, lst[fr], vl }, lst[fr] = Ecnt;
}
struct qry {
    int l, r, id;
} q[N];
inline bool cmp(qry A, qry B) {
    if (pos[A.l] ^ pos[B.l]) return pos[A.l] < pos[B.l];
    return A.r < B.r;
}
void dfs(int u, int ff) {
    dfn[u] = ++clk, rk[clk] = u, sz[u] = 1;
    for (int i = lst[u], v; i; i = e[i].nxt)
        if ((v = e[i].to) ^ ff)
            dis[v] = dis[u] ^ e[i].qz, dfs(v, u), sz[u] += sz[v];
	ed[u] = clk;
}
inline void ins(int x) {
    register int u = 0;
    for (int i = 30, s; ~i; i--) {
        s = (x >> i) & 1;
        if (!ch[u][s]) ch[u][s] = ++Ttot;
        u = ch[u][s], ++tg[u];
    }
}
inline int ask(int x) {
    register int u = 0, re = 0;
    for (int i = 30, s; ~i; i--) {
        s = (x >> i) & 1;
        if (!tg[ch[u][s ^ 1]]) u = ch[u][s];
        else u = ch[u][s ^ 1], re |= 1 << i;
    }
    return re;
}
inline void cancel(int x) {
    register int u = 0;
    for (int i = 30, s; ~i; i--) {
        s = (x >> i) & 1;
        u = ch[u][s], --tg[u];
    }
}
int main() {
    scanf("%d", &n);
    for (int i = 1, u, v, w; i < n; i++) {
        scanf("%d%d%d", &u, &v, &w);
        Ae(u, v, w), Ae(v, u, w);
    }
    dfs(1, 0);
    for (int i = 2; i <= n; i++) {
        q[++tq] = (qry){ dfn[i], ed[i], i };
        q[++tq] = (qry){ ed[i] + 1, dfn[i] + n - 1, i };
    }
    for (int i = 1; i <= n; i++) xo[i] = xo[i + n] = dis[rk[i]];
    n <<= 1, unit = sqrt(n), Btot = n / unit;
    for (int i = 1; i <= n; i++) pos[i] = (i - 1) / unit + 1;
    for (int i = 1; i <= Btot; i++) pr[i] = i * unit;
    if (pr[Btot] < n) pr[++Btot] = n;
    sort(q + 1, q + tq + 1, cmp);
    // init
	L = 1, R = 0;
    for (int i = 1, ql, qr; i <= tq; i++) {
        ql = q[i].l, qr = q[i].r;
        if (cur ^ pos[ql]) {
            cur = pos[ql], now = 0;
            while (L <= R) cancel(xo[L++]);
            L = pr[cur] + 1, R = pr[cur];
        }
        if (pos[ql] == pos[qr]) {
            tmp = 0;
            for (int j = ql; j <= qr; j++) ins(xo[j]);
			for (int j = ql; j <= qr; j++) tmp = max(tmp, ask(xo[j]));
            for (int j = ql; j <= qr; j++) cancel(xo[j]);
            ans[q[i].id] += tmp;
            continue;
        }
        while (R < qr) ins(xo[++R]), now = max(now, ask(xo[R]));
        tmp = now;
        while (L > ql) ins(xo[--L]), now = max(now, ask(xo[L]));
        while (L < pr[cur] + 1) cancel(xo[L++]);
        ans[q[i].id] += now, now = tmp;
    }
    for (int i = 2; i <= n; i++) out = max(out, ans[i]);
    printf("%d\n", out);
}

posted @ 2022-08-17 14:46 小蒟蒻laf 阅读(64) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

小蒟蒻laf

漫漫 OI 路

莫队

莫队