矩阵快速幂优化dp
首先回顾一下矩阵快速幂:求一个\(n\)阶方阵\(A\)的\(k\)次方
code:
#include <iostream>
#include <cstring>
using namespace std;
typedef long long LL;
const int N = 110;
const LL MOD = 1e9 + 7;
int n;
LL k;
struct mat
{
LL m[N][N];
mat() { memset(m, 0, sizeof m); };
mat operator*(mat &B)
{
mat C;
for (int i = 1; i <= n; i++)
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
C.m[i][j] = (C.m[i][j] + (m[i][k] * B.m[k][j]) % MOD) % MOD;
return C;
}
} A, RET;
void qpow(LL k)
{
// RET 一开始为单位矩阵
for (int i = 1; i <= n; i++)
RET.m[i][i] = 1;
while (k)
{
if (k & 1)
RET = RET * A;
k >>= 1;
A = A * A;
}
}
int main()
{
cin >> n >> k;
for (int i = 1; i <= n; i++)
for (int j = 1; j <= n; j++)
cin >> A.m[i][j];
qpow(k);
for (int i = 1; i <= n; i++)
{
for (int j = 1; j <= n; j++)
{
cout << RET.m[i][j] << ' ';
}
cout << endl;
}
return 0;
}
时间复杂度:\(O(n^3logk)\)
对于矩阵快速幂这一大杀器,头脑灵活的算法小子肯定不会把它只用来求解模板题,它更多的是用来优化dp的转移,下面来总结一下常见的转移以及优化方式。
下面矩阵没有另外说明的话\(RET\)表示结果矩阵,\(A\)表示转移矩阵。
标准线性组合
形如\(dp_n=a_1dp_{n-1}+a_2dp_{n-2}+...+a_kdp_{n-k}\)
\(\mathrm{RET} = \begin{pmatrix} dp(k) & dp(k-1) & dp(k-2) & \dots & dp(1) \\ 0 & 0 & 0 & \dots & 0 \\ 0 & 0 & 0 & \dots & 0 \\ 0 & 0 & 0 & \dots & 0 \\ \vdots & \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & \dots & 0 \end{pmatrix}\),\(A = \begin{pmatrix} a_1 & 1 & 0 & \dots & 0 \\ a_2 & 0 & 1 & \dots & 0 \\ a_3 & 0 & 0 & \dots & 0 \\ \vdots & \vdots & \vdots & \ddots & \vdots \\ a_{k-1} & 0 & 0 & \dots & 1 \\ a_k & 0 & 0 & \dots & 0 \end{pmatrix}\)
若需要求的是\(dp_n\),则\(RET*A^{n-k}\),\(RET_{1,1}\)即是答案.
\(f_n=pf_{n-1}+qf_{n-2}\),这是一个\(n=2\)阶的矩阵转移式,构造2阶方阵即可。
\(RET=\begin{pmatrix} f_2 & f_1\\ 0&0 \end{pmatrix}\),\(A=\begin{pmatrix} p & 1\\ q & 0 \end{pmatrix}\).
code:
#include <iostream>
#include <cstring>
using namespace std;
typedef long long LL;
const int N = 5;
LL a_1, a_2, p, q, k, MOD;
int n = 2;
struct mat
{
LL m[N][N];
mat(){memset(m, 0, sizeof m);};
mat operator*(mat& B)
{
mat C;
for(int i = 1; i <= n; i++)
for(int j = 1; j <= n; j++)
for(int k = 1; k <= n; k++)
C.m[i][j] = (C.m[i][j] + (m[i][k] * B.m[k][j]) % MOD) % MOD;
return C;
}
}RET, A;
void qpow(LL k)
{
RET.m[1][1] = a_2 % MOD, RET.m[1][2] = a_1 % MOD;
A.m[1][1] = p % MOD, A.m[1][2] = 1, A.m[2][1] = q % MOD;
if(k <= 0) return;
while(k)
{
if(k & 1) RET = RET * A;
k >>= 1;
A = A * A;
}
}
int main()
{
cin >> p >> q >> a_1 >> a_2 >> k >> MOD;
qpow(k - 2);
if(k == 1) cout << RET.m[1][2] << endl;
else if(k == 2) cout << RET.m[1][1] << endl;
else cout << RET.m[1][1] << endl;
return 0;
}
\(f_n=f_{n-1}+0*f_{n-2}+f_{n-3}\),这是一个\(n=3\)阶的矩阵转移式,构造3阶方阵即可。
\(RET=\begin{pmatrix} f_3 & f_2 & f_1\\ 0 & 0 & 0\\ 0&0&0 \end{pmatrix}\),\(A=\begin{pmatrix} 1 & 1 & 0\\ 0 & 0 & 1 \\ 1 & 0 & 0 \end{pmatrix}\)
code:
#include <iostream>
#include <vector>
#include <unordered_map>
#include <map>
#include <unordered_set>
#include <set>
#include <algorithm>
#include <cmath>
#include <string>
#include <cstring>
#include <queue>
#include <cstring>
using namespace std;
#define endl '\n'
typedef long long LL;
typedef pair<int, int> PII;
#define lc p << 1
#define rc p << 1 | 1
#define lowbit(x) (x & -x)
const int N = 4;
const LL MOD = 1e9 + 7;
const double ln2 = log(2);
const double rec_ln2 = 1.0 / ln2;
LL n;
struct mat
{
LL m[N][N];
mat(){memset(m, 0, sizeof m);}
mat operator* (mat& B)
{
mat C;
for(int i = 1; i <= 3; i++)
for(int j = 1; j <= 3; j++)
for(int k = 1; k <= 3; k++)
C.m[i][j] = (C.m[i][j] + m[i][k] * B.m[k][j]) % MOD;
return C;
}
}A, RET;
void qpow(LL b)
{
RET.m[1][1] = RET.m[1][2] = RET.m[1][3] = 1;
A.m[1][1] = A.m[1][2] = A.m[2][3] = A.m[3][1] = 1;
// RET.m[1][3] = RET.m[2][1] = RET.m[2][2] = RET.m[3][2] = RET.m[3][3] = 0;
while(b)
{
if(b & 1) RET = RET * A;
A = A * A;
b >>= 1;
}
}
void solve()
{
memset(RET.m, 0, sizeof RET.m);
memset(A.m, 0, sizeof A.m);
cin >> n;
if(n == 1 || n == 2 || n == 3)
{
cout << 1 << endl;
return;
}
qpow(n - 3);
cout << RET.m[1][1] << endl;
}
int main()
{
cin.tie(0), cout.tie(0), ios::sync_with_stdio(false);
int T = 1;
cin >> T;
while(T--)
{
solve();
}
return 0;
}
带常数项的线性递推
形如:\(dp_n=a_1dp_{n-1}+a_2dp_{n-2}+...+a_kdp_{n-k}+c\)
解决方式:构造\(k+1\)阶方阵即可。
\(\mathrm{RET} = \begin{pmatrix} dp(k) & dp(k-1) & dp(k-2) & \dots & dp(1) & 1\\ 0 & 0 & 0 & \dots & 0 & 0 \\ 0 & 0 & 0 & \dots & 0 & 0\\ 0 & 0 & 0 & \dots & 0 & 0\\ \vdots & \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & \dots & 0 & 0 \end{pmatrix}_{k+1}\),\(A = \begin{pmatrix} a_1 & 1 & 0 & \dots & 0 & 0\\ a_2 & 0 & 1 & \dots & 0 & 0\\ a_3 & 0 & 0 & \dots & 0 & 0\\ \vdots & \vdots & \vdots & \ddots & \vdots \\ a_{k-1} & 0 & 0 & \dots & 1 & 0 \\ a_k & 0 & 0 & \dots & 0 & 0\\ c & 0 & 0 & \dots & 0 & 1 \end{pmatrix}_{k+1}\)
若需要求的是\(dp_n\),则\(RET*A^{n-k+1}\),\(RET_{1,1}\)即是答案.
\(X_{n}=(aX_{n}+c)\%m\),对\(m\)取模前面其实也有,只是没有写到转移方程里面,因为取模的过程是在矩阵乘法的时候计算的,把\(\%m\)去掉这题就很明了了。
\(RET=\begin{pmatrix} X_0 & 1\\ 0 & 0 \end{pmatrix}\),\(A=\begin{pmatrix} a & 0\\ c & 1 \end{pmatrix}\),唯一需要注意的是,这一题的数据会爆LL,用__int128或者龟速乘都可以。
code:
#include <iostream>
#include <cstring>
using namespace std;
const int N = 3;
typedef __int128_t LL;
int n = 2;
LL MOD, a, c, x_0, k, g;
inline LL read()
{
char c=getchar();
LL x=0,f=1;
while(c<48){if(c=='-')f=-1;c=getchar();}
while(c>47)x=(x*10)+(c^48),c=getchar();
return x*f;
}
inline void mwrite(LL a)
{
if(a>9)mwrite(a/10);
putchar((a%10)|48);
}
inline void write(LL a,char c)
{
mwrite(a<0?(putchar('-'),a=-a):a);
putchar(c);
}
struct mat
{
LL m[N][N];
mat() { memset(m, 0, sizeof m); };
mat operator*(mat &B)
{
mat C;
for (int i = 1; i <= n; i++)
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
C.m[i][j] = (C.m[i][j] + (m[i][k] * B.m[k][j]) % MOD) % MOD;
return C;
}
} RET, A;
void qpow(LL k)
{
RET.m[1][1] = x_0, RET.m[1][2] = 1;
A.m[1][1] = a, A.m[2][1] = c, A.m[2][2] = 1;
while (k)
{
if (k & 1)
RET = RET * A;
A = A * A;
k >>= 1;
}
}
int main()
{
MOD = read(); a = read(); c = read(); x_0 = read(); k = read(); g = read();
qpow(k);
write(RET.m[1][1] % g, ' ');
return 0;
}
带多项式的递推
形如:\(f_n=a_1f_{n-1}+a_2f_{n-2}+...+a_kf_{1} + g(n)+t(n)+...\)
即从原来的转移方程的基础上多了几项别的递推式,这些递推式或许和\(n \),\(f_n\)本身有关,亦或者是一项单独的线性递推式,对于这一类问题,没有固定的转移矩阵的形式,但是有一些既定的事实就是最终的转移矩阵是由各项的子矩阵构成的,面对这一类的问题,需要耐心化简题目给定的递推式,如果转移式里面有双线性项,将里面的双线性项化成单线性项,这个过程一般是由单线性项的子矩阵堆砌出来双线性项的转移矩阵,总的来说比前面的要复杂一点,但都仅仅是矩阵快速幂本身的另一种形式,转移方程比较容易发现,而且转移矩阵不涉及到广义矩阵。
对于前面的题目,他们的转移仅仅是依赖一个常数或者前面的若干项,对于前\(n \)项 fib 的和\(S_n=S_{n-1}+f_n\),它依赖的不但是\(S\)函数本身,还需要依赖\( f\)函数,这也就意味着我们在构造转移矩阵的时候需要统计兼顾\(S\)和\(f\)两者,但这其实很明了,\(f\)的转移在\(S\)中最作为子级存在,换句话说\(S\)的转移矩阵中需要包含\(f\)转移矩阵这个子矩阵.
\(RET_1\)表示\(S\)的结果,\(A_1\)表示\(S\)的转移矩阵
\(RET_1=\begin{pmatrix} S_1 & f_2 & f_1 \\ 0 & 0 & 0 \\ 0 & 0 & 0 \end{pmatrix}\),\(A_1=\begin{pmatrix} 1 & 0 & 0\\ 1 & 1 & 1\\ 0 & 1 & 0 \end{pmatrix}\),不难发现,\(A1\)右下角的 2 阶子矩阵就是\(f\)的转移矩阵,下面写一下针对于\(S\)的矩阵快速幂转移。
#include <iostream>
#include <cstring>
using namespace std;
const int N = 4;
typedef long long LL;
const int MOD = 1e9 + 7;
int n = 3;
LL k;
struct mat
{
LL m[N][N];
mat() { memset(m, 0, sizeof m); };
mat operator*(mat &B)
{
mat C;
for (int i = 1; i <= n; i++)
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
C.m[i][j] = (C.m[i][j] + (m[i][k] * B.m[k][j]) % MOD) % MOD;
return C;
}
}RET, A;
void qpow(LL k)
{
RET.m[1][1] = 1, RET.m[1][2] = 1, RET.m[1][3] = 1;
A.m[1][1] = 1, A.m[2][1] = 1, A.m[2][2] = 1, A.m[2][3] = 1, A.m[3][2] = 1;
if(k <= 0) return;
while(k)
{
if(k & 1) RET = RET * A;
k >>= 1;
A = A * A;
}
}
int main()
{
cin >> k;
qpow(k - 1);
cout << RET.m[1][1] << endl;
return 0;
}
有了上面的经验,下面来探讨一下对于\(T_n\)的转移方式,\(T_n=T_{n-1}+nf_n\),\(nf_n\)是一个双线性项没法直接进行转移,可以将其拆解成\((n-1)f_{n-1}+(n-1)f_{n-2}+f_{n-1}+f_{n-2}\),令\(C_n=nf_{n}\),\(D_n=nf_{n-1}\),\(A_n=f_n\),\(B_n=f_{n-1}\)
- \(A_{n+1}=A_n+B_n\)
- \(B_{n+1}=A_n\)
- \(C_{n+1}=C_n+D_n+A_n+B_n\)
- \(D_{n+1}=A_n+C_n\)
- \(T_{n+1}=T_n+A_n+B_n+C_n+D_n\)
- \(\begin{pmatrix} A_n & B_n & C_n & D_n & T_n \end{pmatrix}\)
令\(RET\)表示\(T_n\)的结果矩阵,\(A\)表示转移矩阵,则
\(RET=\begin{pmatrix} f_2 & f_1 & 2f_2 & 2f_1 & f_1+2f_2\\ 0 & 0 & 0 & 0 & 0\\ 0 & 0 & 0 & 0 & 0\\ 0 & 0 & 0 & 0 & 0\\ 0 & 0 & 0 & 0 & 0 \end{pmatrix}\),\(A=\begin{pmatrix} 1 & 1 & 1 & 1 & 1\\ 1 & 0 & 1 & 0 & 1\\ 0 & 0 & 1 & 1 & 1\\ 0 & 0 & 1 & 0 & 1\\ 0 & 0 & 0 & 0 & 1\\ \end{pmatrix}\)
\(T_k=(RET*A^{k-1})_{1,5}\)
code:
#include <iostream>
#include <cstring>
using namespace std;
const int N = 6;
typedef long long LL;
int n = 5;
LL k, MOD;
struct mat
{
LL m[N][N];
mat() { memset(m, 0, sizeof m); };
void Debug()
{
for(int i = 1; i <= n; i++)
{
for(int j = 1; j <= n; j++)
{
cout << m[i][j] << ' ';
}
cout << endl;
}
}
mat operator*(mat &B)
{
mat C;
for (int i = 1; i <= n; i++)
for (int j = 1; j <= n; j++)
for (int k = 1; k <= n; k++)
C.m[i][j] = (C.m[i][j] + (m[i][k] * B.m[k][j]) % MOD) % MOD;
return C;
}
}RET, A;
void qpow(LL k)
{
RET.m[1][1] = 1, RET.m[1][2] = 1, RET.m[1][3] = 2, RET.m[1][4] = 2, RET.m[1][5] = 3;
A.m[1][1] = 1, A.m[1][2] = 1, A.m[1][3] = 1, A.m[1][4] = 1, A.m[1][5] = 1;
A.m[2][1] = 1, A.m[2][3] = 1, A.m[2][5] = 1;
A.m[3][3] = 1, A.m[3][4] = 1, A.m[3][5] = 1;
A.m[4][3] = 1, A.m[4][5] = 1;
A.m[5][5] = 1;
if(k <= 0) return;
while(k)
{
if(k & 1) RET = RET * A;
k >>= 1;
A = A * A;
}
}
int main()
{
cin >> k >> MOD;
if(k == 1)
{
cout << 1 % MOD << endl;
return 0;
}
qpow(k - 2);
cout << RET.m[1][5] << endl;
return 0;
}
\(f_1=a\),\(f_2=b\),\(f_n=f_{n-1}+2f_{n-2}+n^4\).
- \(f_n=f_{n-1}+2f_{n-2}\)是标准的线性组合
- \((n+1)^4=n^4+4n^3+6n^2+4n+1\)
- \((n+1)^3=n^3+3n^3+3n+1\)
- \((n+1)^2=n^2+2n+1\)
- \((n+1)=n+1\)
- \(\begin{pmatrix} f_{n-1} & f_{n-2} & n^4 & n^3 & n^2 & n & 1 \end{pmatrix}\)
- \(f_n=f_{n-1}+2f_{n-2}\)
\(A=\begin{pmatrix} 1 & 1 & 0 & 0 & 0 & 0 & 0\\ 2 & 0 & 0 & 0 & 0 & 0 & 0\\ 0 & 0 & 1 & 0 & 0 & 0 & 0\\ 0 & 0 & 4 & 1 & 0 & 0 & 0\\ 0 & 0 & 6 & 3 & 1 & 0 & 0\\ 0 & 0 & 4 & 3 & 2 & 1 & 0\\ 0 & 0 & 1 & 1 & 1 & 1 & 1 \end{pmatrix}_{7*7}\),
\(RET=\begin{pmatrix} f_{n-1} & f_{n-2} & n^4 & n^3 & n^2 & n & 1\\ \vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots\\ 0 & 0 & 0 & 0 & 0 & 0 & 0 \end{pmatrix}_{7*7}\)
code:
这一题其实比上一题要简单不少,对这种多项式的递推将阶不断往下降直到到单一线性阶即可,然后就堆积转移矩阵即可。
图上的路径类 DP
状态压缩类的批量转移
广义矩阵快速幂
P2886 [USACO07NOV] Cow Relays G
浙公网安备 33010602011771号