那些算法的空间优化
猫树分治
例题:[ABC426G] Range Knapsack Query
朴素实现:
#include <iostream>
#include <vector>
#include <cstring>
using namespace std;
using ll = long long;
constexpr int N = 20000 + 1, K = 500 + 1, Q = 200000 + 1;
int w[N], v[N];
struct {
int l, r, c;
} que[Q];
ll f[N][K], ans[Q];
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
int n;
cin >> n;
for (int i = 1; i <= n; ++i) {
cin >> w[i] >> v[i];
}
int q;
cin >> q;
for (int i = 1; i <= q; ++i) {
auto &[l, r, c] = que[i];
cin >> l >> r >> c;
}
auto dfs = [&](auto &&self, int l, int r, const vector<int> &qid) {
if (l == r) {
for (int i : qid) {
ans[i] = que[i].c >= w[r] ? v[r] : 0;
}
return ;
}
int mid = (l + r) >> 1;
memset(f[mid], 0, sizeof(f[mid]));
for (int i = w[mid]; i < K; ++i) {
f[mid][i] = v[mid];
}
for (int i = mid - 1; i >= l; --i) {
memcpy(f[i], f[i + 1], sizeof(f[i]));
for (int j = K - 1; j >= w[i]; --j) {
f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
}
}
memset(f[mid + 1], 0, sizeof(f[mid + 1]));
for (int i = w[mid + 1]; i < K; ++i) {
f[mid + 1][i] = v[mid + 1];
}
for (int i = mid + 2; i <= r; ++i) {
memcpy(f[i], f[i - 1], sizeof(f[i]));
for (int j = K - 1; j >= w[i]; --j) {
f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
}
}
vector<int> lq, rq;
for (int i : qid) {
auto [ql, qr, qc] = que[i];
if (qr <= mid) {
lq.push_back(i);
} else if (mid < ql) {
rq.push_back(i);
} else {
for (int j = 0; j <= qc; ++j) {
ans[i] = max(ans[i], f[ql][j] + f[qr][qc - j]);
}
}
}
if (!lq.empty()) {
self(self, l, mid, lq);
}
if (!rq.empty()) {
self(self, mid + 1, r, rq);
}
};
vector<int> qid(q);
for (int i = 0; i < q; ++i) {
qid[i] = i + 1;
}
dfs(dfs, 1, n, qid);
for (int i = 1; i <= q; ++i) {
cout << ans[i] << '\n';
}
return 0;
}
在最坏构造下,如所有询问的 \(l_i = r_i\),则每个询问都会在分治树的每一层的其中一个 vector 出现恰好一次,所以这部分的空间复杂度是 \(O(q \log n)\),有时会成为瓶颈。
优化成 \(\Theta(q)\) 的方法比较自然,使每个分治结点共用同一个询问编号序列即可。具体的,每个分治结点记录的额外信息从 \((\text{le}, \text{ri})\),表示该子树需要处理的询问在编号序列中的下标区间为 \([\text{le}, \text{ri}]\)。递归下去前将左子树的询问在询问序列上从 \(\text{le}\) 向右重写,将左子树的询问在询问序列上从 \(\text{ri}\) 向左重写即可。
猫树分治部分实现:
auto dfs = [&](auto &&self, int l, int r, int le, int ri) {
if (l == r) {
for (int i = le; i <= ri; ++i) {
ans[qid[i]] = que[qid[i]].c >= w[r] ? v[r] : 0;
}
return ;
}
int mid = (l + r) >> 1;
memset(f[mid], 0, sizeof(f[mid]));
for (int i = w[mid]; i < K; ++i) {
f[mid][i] = v[mid];
}
for (int i = mid - 1; i >= l; --i) {
memcpy(f[i], f[i + 1], sizeof(f[i]));
for (int j = K - 1; j >= w[i]; --j) {
f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
}
}
memset(f[mid + 1], 0, sizeof(f[mid + 1]));
for (int i = w[mid + 1]; i < K; ++i) {
f[mid + 1][i] = v[mid + 1];
}
for (int i = mid + 2; i <= r; ++i) {
memcpy(f[i], f[i - 1], sizeof(f[i]));
for (int j = K - 1; j >= w[i]; --j) {
f[i][j] = max(f[i][j], f[i][j - w[i]] + v[i]);
}
}
int lp = le - 1, rp = ri + 1;
for (int i = le; i <= ri; ++i) {
auto [ql, qr, qc] = que[qid[i]];
if (qr <= mid) {
buf[++lp] = qid[i];
} else if (mid < ql) {
buf[--rp] = qid[i];
} else {
for (int j = 0; j <= qc; ++j) {
ans[qid[i]] = max(ans[qid[i]], f[ql][j] + f[qr][qc - j]);
}
}
}
if (le <= lp) {
memcpy(qid + le, buf + le, sizeof(int) * (lp - le + 1));
self(self, l, mid, le, lp);
}
if (rp <= ri) {
memcpy(qid + rp, buf + rp, sizeof(int) * (ri - rp + 1));
self(self, mid + 1, r, rp, ri);
}
};
iota(qid + 1, qid + q + 1, 1);
dfs(dfs, 1, n, 1, q);
优化前:https://atcoder.jp/contests/abc426/submissions/71565759
优化后:https://atcoder.jp/contests/abc426/submissions/71565702
时空的区别不明显是因为:
- 本题的 \(q \log n\) 太小
- 没认真卡,递归层数较浅(存疑)
posted on 2025-12-08 12:32 SkyWave2022 阅读(31) 评论(0) 收藏 举报
浙公网安备 33010602011771号