字符串哈希
- 字符串哈希就是指一个字符串转化成一个整数,并保证字符串不同,得到的哈希值不同,这样就可以用来判断一个该字串是否重复出现过。
- 一般采用取一固定值P(P取质数),把字符串看作P进制数,并分配一个大于0的数值,代表每种字符。 一般来说,我们分配的数值都远小于P。例如,对于小写字母构成的字符串,可以令a = 1 , b = 2 , . . . , z = 26 。 a=1,b=2,...,z=26。a=1,b=2,...,z=26。 取一固定值M,求出该P进制数对M的余数(取模),作为该字符串的Hash值。
- 一般P取P=131或者P=13331,冲突概率低。M通常取M=10E9+7作为模,直接使用**unsigned long long **类型来存储hash值
- 在出现碰撞的情况下,可以构造两组甚至多组hash值来对比确定是否是同一字符串

链接:https://leetcode-cn.com/problems/repeated-string-match/
class Solution {
public:
int strStr(string haystack, string needle) {
int n = haystack.size(), m = needle.size();
if (m == 0) {
return 0;
}
long long k1 = 1e9 + 7;
long long k2 = 1337;
srand((unsigned)time(NULL));
long long kMod1 = rand() % k1 + k1;
long long kMod2 = rand() % k2 + k2;
long long hash_needle = 0;
for (auto c : needle) {
hash_needle = (hash_needle * kMod2 + c) % kMod1;
}
long long hash_haystack = 0, extra = 1;
for (int i = 0; i < m - 1; i++) {
hash_haystack = (hash_haystack * kMod2 + haystack[i % n]) % kMod1;
extra = (extra * kMod2) % kMod1;
}
for (int i = m - 1; (i - m + 1) < n; i++) {
hash_haystack = (hash_haystack * kMod2 + haystack[i % n]) % kMod1;
if (hash_haystack == hash_needle) {
return i - m + 1;
}
hash_haystack = (hash_haystack - extra * haystack[(i - m + 1) % n]) % kMod1;
hash_haystack = (hash_haystack + kMod1) % kMod1;
}
return -1;
}
int repeatedStringMatch(string a, string b) {
int an = a.size(), bn = b.size();
int index = strStr(a, b);
if (index == -1) {
return -1;
}
if (an - index >= bn) {
return 1;
}
return (bn + index - an - 1) / an + 2;
}
};
链接:https://leetcode-cn.com/problems/longest-duplicate-substring/
class Solution {
public:
int n;
unsigned long long prime = 31;
string longestDupSubstring(string s) {
n = s.size();
int l = 1;
int r = n - 1;
int pos = -1;
int len = 0;
auto find = [&](int len){
unsigned long long hash = 0;
unsigned long long power = 1;
for (int i = 0; i < len; i++) {
hash = hash * prime + (s[i] - 'a');
power *= prime;
}
unordered_set<unsigned long long> exist{hash};
for(int i = len; i < n; i++) {
hash = hash * prime - power * (s[i-len] - 'a') + (s[i] - 'a');
if (exist.count(hash)) return (i - len + 1);
exist.insert(hash);
}
return -1;
};
while(l <= r) {
int mid = (l + r) / 2;
int start = find(mid);
if (start != -1) {
len = mid;
pos = start;
l = mid + 1;
} else {
r = mid - 1;
}
}
if (pos == -1) return "";
else return s.substr(pos, len);
}
};

浙公网安备 33010602011771号