题解 HDU5728 【PowMod】

  1. 不保证能 AC 的做法

前置芝士:欧拉函数莫比乌斯反演线性求逆元Dirichlet 前 & 后缀和扩展欧拉定理(exEuler)、卡常

关于欧拉函数,有结论:\(\varphi(ij) = \frac{\varphi(i) \varphi(j) \gcd(i, j)}{\varphi(\gcd(i, j))}\)证明略。

\(k = \varphi(n) \displaystyle\sum_{i = 1}^m \frac{\varphi(i) \gcd(i, n)}{\varphi(\gcd(i, n))}\)

\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^m [\gcd(i, n) = d] \varphi(i)\)

\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^{\lfloor \frac{m}{d} \rfloor} [\gcd(i, \frac{n}{d}) = 1] \varphi(id)\)

\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{q\ |\ \frac{n}{d}} \mu(q) \sum_{i = 1}^{\lfloor \frac{m}{dq} \rfloor} \varphi(idq)\)

\(T = dq\),有:

\(k = \varphi(n) \displaystyle\sum_{T\ |\ n} (\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})) \sum_{i = 1}^{\lfloor \frac{m}{T} \rfloor} \varphi(iT)\)

显然,筛出欧拉函数后,\(f(T) = \displaystyle\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})\) 可以通过倒推 Dirichlet 前缀和在 \(O(N \ln \ln N)\) 的时间复杂度内预处理出。

但是……剩下的部分怎么办呢?如果不预处理,每次在线计算还是会 TLE。

那就预处理嘛。设 \(g(n, m) = \displaystyle\sum_{i = 1}^m \varphi(in)\),显然有转移方程 \(g(n, m) = g(n, m - 1) + \varphi(nm)\)

但预处理所有 \(g(n, m)\) 显然是不现实的,所以要选定阈值 \(K\),对于 \(K\) 以内所有数 \(n\) 预处理出 \(g(n, m)\) 的值。

注意到题目中 \(n\) 为无平方因数的数,所以只需要预处理 \(K\) 以内所有的无平方因数的数 \(n\) 即可。我的代码里取 \(K = 17\)(差不多卡在空间限制左右)。

最后剩下的无限幂塔部分显然可以用扩展欧拉定理快速计算。

但这样还是会 TLE。怎么办呢?卡常。register、inline、取模优化、火车头、指令集都加上,然后你就可以愉快地 AC 了。当然,我也不能保证这份代码时时刻刻都能 AC,因为在第一次 AC 后再怎么提交也无法 AC 了。建议选择吃饭或夜深人静的时候提交。

时间复杂度不会算(

丑陋的代码:

#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")

#pragma GCC target("abm")
#pragma GCC target("avx")
#pragma GCC target("f16c")
#pragma GCC target("mmx")
#pragma GCC target("popcnt")
#pragma GCC target("sse")
#pragma GCC target("sse2")
#pragma GCC target("sse3")
#pragma GCC target("sse4")
#pragma GCC target("sse4.1")
#pragma GCC target("sse4.2")
#pragma GCC target("ssse3")
#pragma GCC target("tune=native")

#include <stdio.h>
#include <math.h>

const int N = 1e7 + 1, M = 12, K = 17, mod = 1e9 + 7;
int m;
int prime[N], phi[N], inv[N], f[N], number[M + 7] = {0, 1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17}, g_list[K + 1][N];
bool p[N], mark[K + 7];

inline void init(){
	register int cnt = 0;
	p[0] = p[1] = true;
	phi[1] = 1;
	for (register int i = 2; i < N; i++){
		if (!p[i]){
			prime[++cnt] = i;
			phi[i] = i - 1;
		}
		for (register int j = 1; j <= cnt && i * prime[j] < N; j++){
			int t = i * prime[j];
			p[t] = true;
			if (i % prime[j] == 0){
				phi[t] = phi[i] * prime[j];
				break;
			}
			phi[t] = phi[i] * (prime[j] - 1);
		}
	}
	inv[0] = inv[1] = 1;
	for (register int i = 2; i < N; i++){
		inv[i] = mod - 1ll * (mod / i) * inv[mod % i] % mod;
	}
	for (register int i = 1; i < N; i++){
		f[i] = 1ll * i * inv[phi[i]] % mod;
	}
	for (register int i = cnt; i >= 1; i--){
		for (register int j = (N - 1) / prime[i]; j >= 1; j--){
			int t = j * prime[i];
			if ((f[t] -= f[j]) < 0) f[t] += mod;
		}
	}
	for (register int i = 1; i <= M; i++){
		mark[number[i]] = true;
	}
	for (register int i = 1; i <= K; i++){
		if (mark[i]){
			for (register int j = 1, k = i; k < N; j++, k += i){
				if ((g_list[i][j] = g_list[i][j - 1] + phi[k]) >= mod) g_list[i][j] -= mod;
			}
		}
	}
}

inline int g(register int k){
	if (k <= K) return g_list[k][m / k];
	register int ans = 0;
	for (register int i = k; i <= m; i += k){
		if ((ans += phi[i]) >= mod) ans -= mod;
	}
	return ans;
}

inline int quick_pow(register int x, register int p, register int mod){
	register int ans = 1;
	while (p){
		if (p & 1) ans = 1ll * ans * x % mod;
		x = 1ll * x * x % mod;
		p >>= 1;
	}
	return ans;
}

int solve(register int n, register int p){
	return p == 1 ? 0 : quick_pow(n, solve(n, phi[p]) + phi[p], p);
}

int main(){
	int n, p;
	init();
	while (scanf("%d %d %d", &n, &m, &p) != EOF){
		register int k = 0, t = sqrt(n);
		for (register int i = 1; i <= t; i++){
			if (n % i == 0){
				if ((k += 1ll * f[i] * g(i) % mod) >= mod) k -= mod;
				if (i * i != n && (k += 1ll * f[n / i] * g(n / i) % mod) >= mod) k -= mod;
			}
		}
		printf("%d\n", solve(1ll * k * phi[n] % mod, p));
	}
	return 0;
}
  1. 保证能 AC 的做法

考虑优化 1. 中的做法。

经作者测试,倒推 Dirichlet 前缀和的部分耗时最长。

怎么办呢?我们考虑探究 \(f\) 函数的性质。容易发现,\(f\) 函数是一个积性函数

那我们倒推 Dirichlet 前缀和个锤子啊?

于是可以探究 \(f\) 函数在质数幂次处的表现。下文设 \(p\) 为质数,\(k\) 为非负整数。

  1. \(k = 0\)

显然 \(f(p^k) = 1\)

  1. \(k = 1\)

此时 \(f(p^k) = \frac{p}{p - 1} - 1 = \frac{1}{p - 1}\)

  1. \(k \geq 2\)

此时 \(f(p^k) = \frac{p}{p - 1} - \frac{p}{p - 1} = 0\)

然后你就可以线性筛出 \(f\) 函数了。你甚至会发现这样做不需要怎么卡常就能 AC。

时间复杂度还是不会算(

代码:

#include <stdio.h>
#include <math.h>

typedef long long ll;

const int N = 1e7 + 7, M = 12, K = 17, mod = 1e9 + 7;
int inv[N], prime[N], phi[N], f[N], number[M + 7] = {0, 1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17}, g_list[K + 1][N];
bool p[N], mark[K + 7];

inline void init(){
	int cnt = 0;
	inv[0] = inv[1] = 1;
	for (register int i = 2; i < N; i++){
		inv[i] = mod - 1ll * (mod / i) * inv[mod % i] % mod;
	}
	p[0] = p[1] = true;
	phi[1] = 1;
	f[1] = 1;
	for (register int i = 2; i < N; i++){
		if (!p[i]){
			prime[++cnt] = i;
			phi[i] = i - 1;
			f[i] = inv[i - 1];
		}
		for (register int j = 1; j <= cnt && i * prime[j] < N; j++){
			int t = i * prime[j];
			p[t] = true;
			if (i % prime[j] == 0){
				phi[t] = phi[i] * prime[j];
				f[t] = 0;
				break;
			}
			phi[t] = phi[i] * (prime[j] - 1);
			f[t] = 1ll * f[i] * inv[prime[j] - 1] % mod;
		}
	}
	for (register int i = 1; i <= M; i++){
		mark[number[i]] = true;
	}
	for (register int i = 1; i <= K; i++){
		if (mark[i]){
			for (register int j = 1, k = i; k < N; j++, k += i){
				g_list[i][j] = (g_list[i][j - 1] + phi[k]) % mod;
			}
		}
	}
}

inline int g(int n, int m){
	if (m <= K) return g_list[m][n / m];
	int ans = 0;
	for (register int i = m; i <= n; i += m){
		ans = (ans + phi[i]) % mod;
	}
	return ans;
}

inline ll quick_pow(ll x, ll p, ll mod){
	ll ans = 1;
	while (p){
		if (p & 1) ans = ans * x % mod;
		x = x * x % mod;
		p >>= 1;
	}
	return ans;
}

ll solve(ll n, int p){
	return p == 1 ? 0 : quick_pow(n, solve(n, phi[p]) + phi[p], p);
}

int main(){
	int n, m, p;
	init();
	while (scanf("%d %d %d", &n, &m, &p) != EOF){
		int t = sqrt(n);
		ll k = 0;
		for (register int i = 1; i <= t; i++){
			if (n % i == 0){
				k = (k + 1ll * f[i] * g(m, i) % mod) % mod;
				if (i * i != n) k = (k + 1ll * f[n / i] * g(m, n / i) % mod) % mod;
			}
		}
		printf("%lld\n", solve(k * phi[n] % mod, p));
	}
	return 0;
}
posted @ 2021-07-30 18:39  LovelyLeasier  阅读(86)  评论(0)    收藏  举报