UOJ530【美团杯2020】汉明距离【随机化】

下发长为 \(d\)\(01\)\(\pi\),输入长为 \(d\)\(01\)\(\sigma\),求 \(d(\pi,\sigma)=\sum_{i=1}^d|\pi_i-\sigma_i|\)

\(d=2^{19}\),代码长度限制 \(2\text{KB}\),2-approximate。


JL Lemma

给定 \(\epsilon\in(0,1)\)\(m\) 个点 \(x_1,\cdots,x_m\in\R^N\)\(\forall n>\log m/(\epsilon^2/2-\epsilon^3/3)\),存在线性映射 \(f:\R^N\rightarrow\R^n\),使得

\[\left|\frac{|f(x_i)-f(x_j)|^2}{|x_i-x_j|^2}-1\right|\le\epsilon \]

这实际上是一个超级著名的结论,因为它在将数据量压缩到对数阶的同时保相当程度的线性性。然而不太会证

JL Lemma 的特例

\(\mathbf v\)\(d\) 维向量,\(A\)\(\{-1,1\}^{n\times d}\) 中均匀随机,则 \(\mathbb E[|A\mathbf v|^2]=n|\mathbf v|^2,\text{Var}[|A\mathbf v|^2]=n|v|^4\)

于是我们固定随机数种子,随机一个 \(A\)。算出 \(|A(\pi-\sigma)|^2\) 然后 \(/n\) 四舍五入即可。

先把 \(A\cdot\pi\) 算出来打个表,然后计算时把 \(A\cdot\sigma\) 算出来即可。

\(n=2^8\),时间复杂度 \(O(\frac{nd}w)\),代码长度复杂度 \(O(n)\)

#include<bits/stdc++.h>
using namespace std;
typedef long long LL;
const int N = 1<<19, B = 256, biao[B] = {-171,345,-491,-15,-463,-335,487,-429,-257,-1161,-289,-545,-341,-273,-323,455,-621,25,921,-865,57,-7,131,705,-133,-513,715,-411,497,-745,765,-1105,305,247,571,-297,47,-661,-951,-371,197,447,-859,-31,41,701,371,243,435,677,571,-509,-885,891,-337,-879,-811,337,-1075,309,-433,873,-285,-441,-979,-373,969,-279,155,375,-95,225,-187,-187,-227,-427,229,-585,869,489,341,-311,5,27,187,1,-629,281,593,207,-175,645,693,-321,-485,-529,-163,-493,623,859,165,-287,439,113,-653,591,-73,417,-37,-719,307,491,537,141,619,-155,-389,-381,673,-613,-187,-637,-45,367,-289,-827,605,-127,-1059,93,629,-279,-89,353,-13,-1231,341,609,-219,-125,-813,205,-655,607,-217,-91,-431,227,-75,-275,407,69,429,107,-49,-725,-357,29,-531,681,-143,-41,-407,835,-533,471,-85,419,51,-51,-1095,-53,717,-175,-681,-663,-73,-641,1281,189,99,519,-365,-505,269,335,-1101,-727,193,-331,289,-149,409,43,65,-3,1319,121,-881,-271,-431,-123,1167,249,-801,-109,501,-41,-469,81,-27,-115,959,-373,649,-701,-267,-583,-171,-1123,-75,-87,-525,119,-709,111,869,165,-381,-829,561,15,743,-11,-123,233,-83,145,-271,67,841,133,-229,-421,-809,123,149,-877,-21,119,-821,-531,-449,263,125,675};
char str[N+5]; unsigned S[N>>5]; int sum; LL ans;
int main(){
	mt19937 rng(20210325); scanf("%s", str);
	for(int i = 16383;~i;-- i){
		S[i] = strtol(str+(i<<5), NULL, 2) ^ rng(); str[i<<5] = 0;
		sum += __builtin_popcount(S[i]);
	} for(int _ = 0;_ < B;++ _){ int res = 0;
		for(int i = 0;i < 16384;++ i) res += __builtin_popcount(rng() & S[i]);
		res = res * 2 - sum - biao[_]; ans += (LL)res * res;
	} printf("%lld\n", (ans + 128) / B);
}
posted @ 2021-03-25 17:39  mizu164  阅读(122)  评论(0)    收藏  举报