LOJ3058. 「HNOI2019」白兔之舞 [DP,MTT]

LOJ

前置知识:任意长度NTT

普通NTT只能做\(2^k\)的循环卷积,尝试扩展成长度为\(n\)的循环卷积,保证模意义下\(\omega_n\)存在。

不管怎样还是要算点值。推式子:

\[\begin{align*} X_i&=\sum_{j=0}^{n-1}x_j\omega_n^{ij}\\ &=\sum_{j=0}^{n-1}x_j\omega_n^{{i+j\choose2}-{i\choose 2}-{j\choose 2}}\\ &=\omega_n^{-{i\choose 2}}\sum_{j=0}^{n-1}x_j\omega_n^{-{j\choose 2}}\times \omega_n^{{i+j\choose2}} \end{align*} \]

\(F_i=x_i\omega_n^{-{i\choose 2}},G_i=\omega_n^{{i\choose 2}}\),那么就有\(X_i=\sum_{j=0}^{n-1} F_jG_{i+j}\),就可以任意模数NTT做了。

思路

发现什么性质都观察不出来,于是直接上DP。\(dp_{i,j,x}\)表示前\(i\)层,走了\(j\)步,走到第二维是\(x\)的点(但第一维不一定是\(i\))的方案数,有转移:

\[dp_{i,j,x}=dp_{i-1,j,x}+\sum_y dp_{i-1,j-1,y}\times w_{y,x} \]

容易发现这连暴力分都没有……

发现\(n=1\)的时候,\(dp_i\)可以被看作是一个多项式,于是\(dp_L=(1+wx)^L\),可以写一个任意模数NTT。

怎么扩展?我们可以再记一维\(y\),把数组的意义改成在原有的基础上加\(i\)层、多走\(j\)步,从\(x\)走到\(y\)的方案数。你发现此时\(dp_{i,j,x,y}\)可以合并任意的\(k,i-k\)来完成转移,于是可以倍增。

倍增的时候容易发现\(j\)这一维是个循环卷积,可以MTT优化,于是获得了\(O(n^3K\log L\log K)\)的做法。

显然这是过不去的。我们发现题目保证了\(K|(mod-1)\),于是可以倍增的时候只存点值,只有在开头结尾搞个任意长度NTT,做到\(O(n^3K\log L)\)

常数巨大,被标算踩爆,但是好想。

(然而标解也不怎么难想的样子?)

代码

#include<bits/stdc++.h>
clock_t t=clock();
namespace my_std{
	using namespace std;
	#define pii pair<int,int>
	#define fir first
	#define sec second
	#define MP make_pair
	#define rep(i,x,y) for (int i=(x);i<=(y);i++)
	#define drep(i,x,y) for (int i=(x);i>=(y);i--)
	#define go(x) for (int i=head[x];i;i=edge[i].nxt)
	#define templ template<typename T>
	#define sz 273333
	typedef long long ll;
	typedef double db;
	typedef long double ld;
	mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());
	templ inline T rnd(T l,T r) {return uniform_int_distribution<T>(l,r)(rng);}
	templ inline bool chkmax(T &x,T y){return x<y?x=y,1:0;}
	templ inline bool chkmin(T &x,T y){return x>y?x=y,1:0;}
	templ inline void read(T& t)
	{
		t=0;char f=0,ch=getchar();double d=0.1;
		while(ch>'9'||ch<'0') f|=(ch=='-'),ch=getchar();
		while(ch<='9'&&ch>='0') t=t*10+ch-48,ch=getchar();
		if(ch=='.'){ch=getchar();while(ch<='9'&&ch>='0') t+=d*(ch^48),d*=0.1,ch=getchar();}
		t=(f?-t:t);
	}
	template<typename T,typename... Args>inline void read(T& t,Args&... args){read(t); read(args...);}
	char __sr[1<<21],__z[20];int __C=-1,__zz=0;
	inline void Ot(){fwrite(__sr,1,__C+1,stdout),__C=-1;}
	inline void print(register int x)
	{
		if(__C>1<<20)Ot();if(x<0)__sr[++__C]='-',x=-x;
		while(__z[++__zz]=x%10+48,x/=10);
		while(__sr[++__C]=__z[__zz],--__zz);__sr[++__C]='\n';
	}
	void file()
	{
		#ifdef NTFOrz
		freopen("a.in","r",stdin);
		#endif
	}
	inline void chktime()
	{
		#ifdef NTFOrz
		cout<<(clock()-t)/1000.0<<'\n';
		#endif
	}
	ll mod;
	ll ksm(ll x,int y){ll ret=1;for (;y;y>>=1,x=x*x%mod) if (y&1) ret=ret*x%mod;return ret;}
	ll inv(ll x){return ksm(x,mod-2);}
//	inline ll mul(ll a,ll b){ll d=(ll)(a*(double)b/mod+0.5);ll ret=a*b-d*mod;if (ret<0) ret+=mod;return ret;}
}
using namespace my_std;

ll g,Wk;
vector<int>pp;
bool check(ll x){for (auto t:pp) if (ksm(x,t)==1) return 0;return 1;}
void getG(int K)
{
	pp.push_back(1);
	for (ll i=2;i*i<=mod;++i) if ((mod-1)%i==0) pp.push_back(i),pp.push_back((mod-1)/i);
	ll x;
	do x=rnd(2ll,mod-1); while (!check(x));
	g=x;Wk=ksm(g,(mod-1)/K);
}

int n,x,y,L,K;
ll w[4][4];

const ld PI=acos(-1);
struct Complex
{
	ld a,b;
	Complex(ld x=0,ld y=0){a=x,b=y;}
};
inline Complex operator + (const Complex &x,const Complex &y){return Complex(x.a+y.a,x.b+y.b);}
inline Complex operator - (const Complex &x,const Complex &y){return Complex(x.a-y.a,x.b-y.b);}
inline Complex operator * (const Complex &x,const Complex &y){return Complex(x.a*y.a-x.b*y.b,x.a*y.b+x.b*y.a);}

int r[sz],limit;
void FFT_init(int N)
{
	int l=-1;limit=1;
	while (limit<=N) limit<<=1,++l;
	rep(i,0,limit-1) r[i]=(r[i>>1]>>1)|((i&1)<<l);
}
void FFT(Complex *a,int type)
{
	rep(i,0,limit-1) if (i<r[i]) swap(a[i],a[r[i]]);
	for (int mid=1;mid<limit;mid<<=1)
	{
		Complex Wn(cos(PI/mid),type*sin(PI/mid));
		for (int len=mid<<1,j=0;j<limit;j+=len)
		{
			Complex w(1,0);
			for (int k=0;k<mid;k++,w=w*Wn)
			{
				Complex x=a[j+k],y=w*a[j+mid+k];
				a[j+k]=x+y;
				a[j+k+mid]=x-y;
			}
		}	
	}
	if (type==-1) rep(i,0,limit-1) a[i].a/=limit;
}

Complex A[sz],B[sz],C[sz],D[sz],E[sz],F[sz],G[sz],H[sz];
void MTT(ll *a,int n,ll *b,int m,ll *ret)
{
	FFT_init(n+m);
	rep(i,0,limit-1) A[i]=B[i]=C[i]=D[i]=E[i]=F[i]=G[i]=H[i]=Complex(0,0);
	rep(i,0,n-1) A[i].a=a[i]>>15,B[i].a=a[i]&0x7fff;
	rep(i,0,m-1) C[i].a=b[i]>>15,D[i].a=b[i]&0x7fff;
	FFT(A,1);FFT(B,1);FFT(C,1);FFT(D,1);
	rep(i,0,limit-1)
	{
		E[i]=A[i]*C[i];
		F[i]=A[i]*D[i];G[i]=B[i]*C[i];
		H[i]=B[i]*D[i];	
	}
	FFT(E,-1);FFT(F,-1);FFT(G,-1);FFT(H,-1);
	rep(i,0,n+m-2)
	{
		ll x1=ll(round(E[i].a))%mod,x2=ll(round(F[i].a))%mod,x3=ll(round(G[i].a))%mod,x4=ll(round(H[i].a))%mod;
		ret[i]=((x1<<30)%mod+(x2<<15)%mod+(x3<<15)%mod+x4%mod+mod)%mod;
	}
}

ll tmp1[sz],tmp2[sz],tmp3[sz];
int calc(int x){return 1ll*x*(x-1)/2%(mod-1);}
void iDFT(ll *a)
{
	ll w=inv(Wk);
	rep(i,0,K-1) tmp1[i]=a[i]*ksm(w,mod-1-calc(i))%mod;
	rep(i,0,K+K) tmp2[i]=ksm(w,calc(i));
	reverse(tmp1,tmp1+K);
	MTT(tmp1,K,tmp2,K+K,tmp3);
	rep(i,0,K-1) a[i]=ksm(w,mod-1-calc(i))*tmp3[i+K-1]%mod*inv(K)%mod;
}

ll dp[3][3][3][sz];
void merge(int C,int A,int B)
{
	rep(s,0,n-1) rep(t,0,n-1) rep(p,0,n-1) rep(i,0,K-1) (dp[C][s][t][i]+=dp[A][s][p][i]*dp[B][p][t][i]%mod)%=mod;
	rep(s,0,n-1) rep(t,0,n-1) rep(i,0,K-1) (dp[C][s][t][i]+=dp[A][s][t][i]+dp[B][s][t][i])%=mod;
}
void Clear(int p){rep(i,0,n-1) rep(j,0,n-1) rep(k,0,K-1) dp[p][i][j][k]=0;} 
void solve(int L,int p)
{
	if (L==1)
	{
		rep(i,0,n-1) rep(j,0,n-1) rep(k,0,K-1) dp[p][i][j][k]=dp[2][i][j][k];
		return;
	}
	int mid=L>>1;solve(mid,p^1);
	Clear(p);
	merge(p,p^1,p^1);
	if (L&1)
	{
		Clear(p^1);
		merge(p^1,p,2);
		rep(i,0,n-1) rep(j,0,n-1) rep(k,0,K-1) dp[p][i][j][k]=dp[p^1][i][j][k];
	} 
}

int main()
{
	file();
	read(n,K,L,x,y,mod);
	--x,--y;getG(K);
	rep(i,0,n-1) rep(j,0,n-1) read(w[i][j]);
	rep(i,0,n-1) rep(j,0,n-1) rep(k,0,K-1) dp[2][i][j][k]=w[i][j]*ksm(Wk,k)%mod;
	solve(L,0);
	iDFT(dp[0][x][y]);
	if (x==y) (++dp[0][x][x][0])%=mod;
	rep(i,0,K-1) printf("%lld\n",dp[0][x][y][i]);
	return 0;
}
posted @ 2019-11-30 22:30  p_b_p_b  阅读(287)  评论(0编辑  收藏  举报