字符串哈希
算法原理: 将一个字符串看成是一个P 进制的数字。
代码模板:
# python
def __init__(self, s): n = len(s) self.BASE = BASE = 131 # 进制 131,131313 self.MOD = MOD = 10 ** 13 + 7 # 10**9+7,998244353,10**13+7 self.h = h = [0] * (n + 1) self.p = p = [1] * (n + 1) for i in range(1, n + 1): p[i] = (p[i - 1] * BASE) % MOD h[i] = (h[i - 1] * BASE % MOD + ord(s[i - 1])) % MOD def get_hash(self, l, r): return (self.h[r] - self.h[l - 1] * self.p[r - l + 1] % self.MOD) % self.MOD h = sh.get_hash(l, r)
// 注意这里有个映射,我们的字符串哈希值从1开始,题目中给出的字符串多从0开始,从str -> h 只要所有下标都加1 即可。
// c++
// P可以取1331和131
// 取模的话MOD 可以有1e9 + 7 和1e9 + 9
// 双哈希就是使用进制和模运算 上面的P和MOD 随便组合一下
ull get(int l,int r){
return h[r]-h[l-1]*p[r-l+1]; // ((h[r]-h[l-1]*p[r-l+1]) + MOD) % MOD
}
p[0] = 1; for(int i = 1;i<=n;i++){ h[i] = h[i-1]*P+str[i]; p[i] = p[i-1]*P; }

class StringHash: def __init__(self, s: str): n = len(s) self.MOD = MOD = 10**13 + 7 self.BASE = BASE = 131 self.h = h = [0] * (n + 10) self.p = p = [1] * (n + 10) for i in range(1, n + 1): p[i] = (p[i - 1] * BASE) % MOD h[i] = (h[i - 1] * BASE % MOD + ord(s[i - 1])*2) % MOD def get_hash(self, l, r): return (self.h[r + 1] - self.h[l] * self.p[r - l + 1] % self.MOD) % self.MOD class Solution: def findRepeatedDnaSequences(self, s: str) -> List[str]: hs = StringHash(s) n = len(s) vis = Counter() ans = [] for i in range(n - 9): h = hs.get_hash(i,i+9) vis[h] += 1 if vis[h] == 2: ans.append(s[i:i+10]) return ans


#include<iostream> #include<cstdio> #include<algorithm> #include<cstring> using namespace std; typedef unsigned long long ULL; const int N = 1e5 + 10, P = 131; int n, m; char str[N]; ULL h[N], p[N]; ULL get(int l, int r) { return h[r] - h[l - 1] * p[r - l + 1]; } int main() { cin >> n >> m; cin >> str + 1; p[0] = 1; for(int i = 1; i <= n; i ++ ) { h[i] = h[i - 1] * P + str[i]; p[i] = p[i - 1] * P; } while(m -- ) { int l1, r1, l2, r2; cin >> l1 >> r1 >> l2 >> r2; if(get(l1, r1) == get(l2, r2)) cout << "Yes" << endl; else cout << "No" << endl; } return 0; }

#include<iostream> #include<cstdio> #include<algorithm> #include<cstring> #include<unordered_set> using namespace std; typedef unsigned long long ULL; const int N = 2e5 + 10, P = 131; char str1[N], str2[N]; ULL h[N], p[N]; ULL get(int l, int r) { return h[r] - h[l - 1] * p[r - l + 1]; } int main() { scanf("%s%s", str1 + 1, str2 + 1); int n = strlen(str1 + 1), m = strlen(str2 + 1); p[0] = 1; for(int i = 1; i <= m; i ++ ) { h[i] = h[i - 1] * P + str2[i]; p[i] = p[i - 1] * P; } vector<int> cnt1(26, 0); vector<int> cnt2(26, 0); for(auto &ch: str1) cnt1[ch - 'a'] ++ ; auto check = [&]() { for(int i = 0; i < 26; i ++ ) { if(cnt1[i] != cnt2[i]) return false; } return true; }; unordered_set<ULL> hash; for(int i = 1, j = 1; i <= m; i ++ ) { cnt2[str2[i] - 'a'] ++ ; if(i < n) continue; if(check()) hash.insert(get(j, i)); cnt2[str2[j ++ ] - 'a'] -- ; } cout << hash.size() << endl; return 0; }


#include<iostream> #include<cstdio> #include<cstring> #include<algorithm> #include<unordered_set> using namespace std; typedef unsigned long long ULL; const int N = 2e5 + 10, P = 131; bool f[N]; int main() { unordered_set<ULL> hash; string str; while(cin >> str, str != ".") { ULL h = 0; for(int i = str.size() - 1; i >= 0; i -- ) { h = h * P + str[i]; } hash.insert(h); } str.clear(); string line; while(cin >> line) str += line; int res = 0; f[0] = true; for(int i = 1; i <= str.size(); i ++ ) { ULL h = 0; for(int j = i; j > i - 10; j -- ) { h = h * P + str[j - 1]; if(hash.count(h) && f[j - 1]) { f[i] = true; res = i; break; } } } cout << res << endl; return 0; }

浙公网安备 33010602011771号