NOI 2025 见!

 

那些算法的空间优化

猫树分治

例题:[ABC426G] Range Knapsack Query

朴素实现:

#include <iostream>
#include <vector>
#include <cstring>
using namespace std;

using ll = long long;

constexpr int N = 20000 + 1, K = 500 + 1, Q = 200000 + 1;

int w[N], v[N];
struct {
    int l, r, c;
} que[Q];
ll f[N][K], ans[Q];

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int n;
    cin >> n;
    for (int i = 1; i <= n; ++i) {
        cin >> w[i] >> v[i];
    }
    int q;
    cin >> q;
    for (int i = 1; i <= q; ++i) {
        auto &[l, r, c] = que[i];
        cin >> l >> r >> c;
    }

    auto dfs = [&](auto &&self, int l, int r, const vector<int> &qid) {
        if (l == r) {
            for (int i : qid) {
                ans[i] = que[i].c >= w[r] ? v[r] : 0;
            }
            return ;
        }

        int mid = (l + r) >> 1;
        memset(f[mid], 0, sizeof(f[mid]));
        for (int i = w[mid]; i < K; ++i) {
            f[mid][i] = v[mid];
        }
        for (int i = mid - 1; i >= l; --i) {
            memcpy(f[i], f[i + 1], sizeof(f[i]));
            for (int j = K - 1; j >= w[i]; --j) {
                f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
            }
        }
        memset(f[mid + 1], 0, sizeof(f[mid + 1]));
        for (int i = w[mid + 1]; i < K; ++i) {
            f[mid + 1][i] = v[mid + 1];
        }
        for (int i = mid + 2; i <= r; ++i) {
            memcpy(f[i], f[i - 1], sizeof(f[i]));
            for (int j = K - 1; j >= w[i]; --j) {
                f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
            }
        }
        vector<int> lq, rq;
        for (int i : qid) {
            auto [ql, qr, qc] = que[i];
            if (qr <= mid) {
                lq.push_back(i);
            } else if (mid < ql) {
                rq.push_back(i);
            } else {
                for (int j = 0; j <= qc; ++j) {
                    ans[i] = max(ans[i], f[ql][j] + f[qr][qc - j]);
                }
            }
        }
        if (!lq.empty()) {
            self(self, l, mid, lq);
        }
        if (!rq.empty()) {
            self(self, mid + 1, r, rq);
        }
    };
    vector<int> qid(q);
    for (int i = 0; i < q; ++i) {
        qid[i] = i + 1;
    }
    dfs(dfs, 1, n, qid);
    for (int i = 1; i <= q; ++i) {
        cout << ans[i] << '\n';
    }
    return 0;
}

在最坏构造下,如所有询问的 \(l_i = r_i\),则每个询问都会在分治树的每一层的其中一个 vector 出现恰好一次,所以这部分的空间复杂度是 \(O(q \log n)\),有时会成为瓶颈。

优化成 \(\Theta(q)\) 的方法比较自然,使每个分治结点共用同一个询问编号序列即可。具体的,每个分治结点记录的额外信息从 \((\text{le}, \text{ri})\),表示该子树需要处理的询问在编号序列中的下标区间为 \([\text{le}, \text{ri}]\)。递归下去前将左子树的询问在询问序列上从 \(\text{le}\) 向右重写,将左子树的询问在询问序列上从 \(\text{ri}\) 向左重写即可。

猫树分治部分实现:

auto dfs = [&](auto &&self, int l, int r, int le, int ri) {
    if (l == r) {
        for (int i = le; i <= ri; ++i) {
            ans[qid[i]] = que[qid[i]].c >= w[r] ? v[r] : 0;
        }
        return ;
    }

    int mid = (l + r) >> 1;
    memset(f[mid], 0, sizeof(f[mid]));
    for (int i = w[mid]; i < K; ++i) {
        f[mid][i] = v[mid];
    }
    for (int i = mid - 1; i >= l; --i) {
        memcpy(f[i], f[i + 1], sizeof(f[i]));
        for (int j = K - 1; j >= w[i]; --j) {
            f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
        }
    }
    memset(f[mid + 1], 0, sizeof(f[mid + 1]));
    for (int i = w[mid + 1]; i < K; ++i) {
        f[mid + 1][i] = v[mid + 1];
    }
    for (int i = mid + 2; i <= r; ++i) {
        memcpy(f[i], f[i - 1], sizeof(f[i]));
        for (int j = K - 1; j >= w[i]; --j) {
            f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
        }
    }
    int lp = le - 1, rp = ri + 1;
    for (int i = le; i <= ri; ++i) {
        auto [ql, qr, qc] = que[qid[i]];
        if (qr <= mid) {
            buf[++lp] = qid[i];
        } else if (mid < ql) {
            buf[--rp] = qid[i];
        } else {
            for (int j = 0; j <= qc; ++j) {
                ans[qid[i]] = max(ans[qid[i]], f[ql][j] + f[qr][qc - j]);
            }
        }
    }
    if (le <= lp) {
        memcpy(qid + le, buf + le, sizeof(int) * (lp - le + 1));
        self(self, l, mid, le, lp);
    }
    if (rp <= ri) {
        memcpy(qid + rp, buf + rp, sizeof(int) * (ri - rp + 1));
        self(self, mid + 1, r, rp, ri);
    }
};
iota(qid + 1, qid + q + 1, 1);
dfs(dfs, 1, n, 1, q);

优化前:https://atcoder.jp/contests/abc426/submissions/71565759

优化后:https://atcoder.jp/contests/abc426/submissions/71565702

时空的区别不明显是因为:

  1. 本题的 \(q \log n\) 太小
  2. 没认真卡,递归层数较浅(存疑)

posted on 2025-12-08 12:32  SkyWave2022  阅读(31)  评论(0)    收藏  举报

导航