题解 HDU5728 【PowMod】
- 不保证能 AC 的做法
前置芝士:欧拉函数、莫比乌斯反演、线性求逆元、Dirichlet 前 & 后缀和、扩展欧拉定理(exEuler)、卡常
关于欧拉函数,有结论:\(\varphi(ij) = \frac{\varphi(i) \varphi(j) \gcd(i, j)}{\varphi(\gcd(i, j))}\)。证明略。
\(k = \varphi(n) \displaystyle\sum_{i = 1}^m \frac{\varphi(i) \gcd(i, n)}{\varphi(\gcd(i, n))}\)
\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^m [\gcd(i, n) = d] \varphi(i)\)
\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^{\lfloor \frac{m}{d} \rfloor} [\gcd(i, \frac{n}{d}) = 1] \varphi(id)\)
\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{q\ |\ \frac{n}{d}} \mu(q) \sum_{i = 1}^{\lfloor \frac{m}{dq} \rfloor} \varphi(idq)\)
令 \(T = dq\),有:
\(k = \varphi(n) \displaystyle\sum_{T\ |\ n} (\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})) \sum_{i = 1}^{\lfloor \frac{m}{T} \rfloor} \varphi(iT)\)
显然,筛出欧拉函数后,\(f(T) = \displaystyle\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})\) 可以通过倒推 Dirichlet 前缀和在 \(O(N \ln \ln N)\) 的时间复杂度内预处理出。
但是……剩下的部分怎么办呢?如果不预处理,每次在线计算还是会 TLE。
那就预处理嘛。设 \(g(n, m) = \displaystyle\sum_{i = 1}^m \varphi(in)\),显然有转移方程 \(g(n, m) = g(n, m - 1) + \varphi(nm)\)。
但预处理所有 \(g(n, m)\) 显然是不现实的,所以要选定阈值 \(K\),对于 \(K\) 以内所有数 \(n\) 预处理出 \(g(n, m)\) 的值。
注意到题目中 \(n\) 为无平方因数的数,所以只需要预处理 \(K\) 以内所有的无平方因数的数 \(n\) 即可。我的代码里取 \(K = 17\)(差不多卡在空间限制左右)。
最后剩下的无限幂塔部分显然可以用扩展欧拉定理快速计算。
但这样还是会 TLE。怎么办呢?卡常。register、inline、取模优化、火车头、指令集都加上,然后你就可以愉快地 AC 了。当然,我也不能保证这份代码时时刻刻都能 AC,因为在第一次 AC 后再怎么提交也无法 AC 了。建议选择吃饭或夜深人静的时候提交。
时间复杂度不会算(
丑陋的代码:
#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")
#pragma GCC target("abm")
#pragma GCC target("avx")
#pragma GCC target("f16c")
#pragma GCC target("mmx")
#pragma GCC target("popcnt")
#pragma GCC target("sse")
#pragma GCC target("sse2")
#pragma GCC target("sse3")
#pragma GCC target("sse4")
#pragma GCC target("sse4.1")
#pragma GCC target("sse4.2")
#pragma GCC target("ssse3")
#pragma GCC target("tune=native")
#include <stdio.h>
#include <math.h>
const int N = 1e7 + 1, M = 12, K = 17, mod = 1e9 + 7;
int m;
int prime[N], phi[N], inv[N], f[N], number[M + 7] = {0, 1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17}, g_list[K + 1][N];
bool p[N], mark[K + 7];
inline void init(){
register int cnt = 0;
p[0] = p[1] = true;
phi[1] = 1;
for (register int i = 2; i < N; i++){
if (!p[i]){
prime[++cnt] = i;
phi[i] = i - 1;
}
for (register int j = 1; j <= cnt && i * prime[j] < N; j++){
int t = i * prime[j];
p[t] = true;
if (i % prime[j] == 0){
phi[t] = phi[i] * prime[j];
break;
}
phi[t] = phi[i] * (prime[j] - 1);
}
}
inv[0] = inv[1] = 1;
for (register int i = 2; i < N; i++){
inv[i] = mod - 1ll * (mod / i) * inv[mod % i] % mod;
}
for (register int i = 1; i < N; i++){
f[i] = 1ll * i * inv[phi[i]] % mod;
}
for (register int i = cnt; i >= 1; i--){
for (register int j = (N - 1) / prime[i]; j >= 1; j--){
int t = j * prime[i];
if ((f[t] -= f[j]) < 0) f[t] += mod;
}
}
for (register int i = 1; i <= M; i++){
mark[number[i]] = true;
}
for (register int i = 1; i <= K; i++){
if (mark[i]){
for (register int j = 1, k = i; k < N; j++, k += i){
if ((g_list[i][j] = g_list[i][j - 1] + phi[k]) >= mod) g_list[i][j] -= mod;
}
}
}
}
inline int g(register int k){
if (k <= K) return g_list[k][m / k];
register int ans = 0;
for (register int i = k; i <= m; i += k){
if ((ans += phi[i]) >= mod) ans -= mod;
}
return ans;
}
inline int quick_pow(register int x, register int p, register int mod){
register int ans = 1;
while (p){
if (p & 1) ans = 1ll * ans * x % mod;
x = 1ll * x * x % mod;
p >>= 1;
}
return ans;
}
int solve(register int n, register int p){
return p == 1 ? 0 : quick_pow(n, solve(n, phi[p]) + phi[p], p);
}
int main(){
int n, p;
init();
while (scanf("%d %d %d", &n, &m, &p) != EOF){
register int k = 0, t = sqrt(n);
for (register int i = 1; i <= t; i++){
if (n % i == 0){
if ((k += 1ll * f[i] * g(i) % mod) >= mod) k -= mod;
if (i * i != n && (k += 1ll * f[n / i] * g(n / i) % mod) >= mod) k -= mod;
}
}
printf("%d\n", solve(1ll * k * phi[n] % mod, p));
}
return 0;
}
- 保证能 AC 的做法
考虑优化 1. 中的做法。
经作者测试,倒推 Dirichlet 前缀和的部分耗时最长。
怎么办呢?我们考虑探究 \(f\) 函数的性质。容易发现,\(f\) 函数是一个积性函数。
那我们倒推 Dirichlet 前缀和个锤子啊?
于是可以探究 \(f\) 函数在质数幂次处的表现。下文设 \(p\) 为质数,\(k\) 为非负整数。
- 当 \(k = 0\)
显然 \(f(p^k) = 1\)。
- 当 \(k = 1\)
此时 \(f(p^k) = \frac{p}{p - 1} - 1 = \frac{1}{p - 1}\)。
- 当 \(k \geq 2\)
此时 \(f(p^k) = \frac{p}{p - 1} - \frac{p}{p - 1} = 0\)。
然后你就可以线性筛出 \(f\) 函数了。你甚至会发现这样做不需要怎么卡常就能 AC。
时间复杂度还是不会算(
代码:
#include <stdio.h>
#include <math.h>
typedef long long ll;
const int N = 1e7 + 7, M = 12, K = 17, mod = 1e9 + 7;
int inv[N], prime[N], phi[N], f[N], number[M + 7] = {0, 1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17}, g_list[K + 1][N];
bool p[N], mark[K + 7];
inline void init(){
int cnt = 0;
inv[0] = inv[1] = 1;
for (register int i = 2; i < N; i++){
inv[i] = mod - 1ll * (mod / i) * inv[mod % i] % mod;
}
p[0] = p[1] = true;
phi[1] = 1;
f[1] = 1;
for (register int i = 2; i < N; i++){
if (!p[i]){
prime[++cnt] = i;
phi[i] = i - 1;
f[i] = inv[i - 1];
}
for (register int j = 1; j <= cnt && i * prime[j] < N; j++){
int t = i * prime[j];
p[t] = true;
if (i % prime[j] == 0){
phi[t] = phi[i] * prime[j];
f[t] = 0;
break;
}
phi[t] = phi[i] * (prime[j] - 1);
f[t] = 1ll * f[i] * inv[prime[j] - 1] % mod;
}
}
for (register int i = 1; i <= M; i++){
mark[number[i]] = true;
}
for (register int i = 1; i <= K; i++){
if (mark[i]){
for (register int j = 1, k = i; k < N; j++, k += i){
g_list[i][j] = (g_list[i][j - 1] + phi[k]) % mod;
}
}
}
}
inline int g(int n, int m){
if (m <= K) return g_list[m][n / m];
int ans = 0;
for (register int i = m; i <= n; i += m){
ans = (ans + phi[i]) % mod;
}
return ans;
}
inline ll quick_pow(ll x, ll p, ll mod){
ll ans = 1;
while (p){
if (p & 1) ans = ans * x % mod;
x = x * x % mod;
p >>= 1;
}
return ans;
}
ll solve(ll n, int p){
return p == 1 ? 0 : quick_pow(n, solve(n, phi[p]) + phi[p], p);
}
int main(){
int n, m, p;
init();
while (scanf("%d %d %d", &n, &m, &p) != EOF){
int t = sqrt(n);
ll k = 0;
for (register int i = 1; i <= t; i++){
if (n % i == 0){
k = (k + 1ll * f[i] * g(m, i) % mod) % mod;
if (i * i != n) k = (k + 1ll * f[n / i] * g(m, n / i) % mod) % mod;
}
}
printf("%lld\n", solve(k * phi[n] % mod, p));
}
return 0;
}

浙公网安备 33010602011771号