字符串哈希
- 字符串哈希就是指一个字符串转化成一个整数,并保证字符串不同,得到的哈希值不同,这样就可以用来判断一个该字串是否重复出现过。
- 一般采用取一固定值P(P取质数),把字符串看作P进制数,并分配一个大于0的数值,代表每种字符。 一般来说,我们分配的数值都远小于P。例如,对于小写字母构成的字符串,可以令a = 1 , b = 2 , . . . , z = 26 。 a=1,b=2,...,z=26。a=1,b=2,...,z=26。 取一固定值M,求出该P进制数对M的余数(取模),作为该字符串的Hash值。
- 一般P取P=131或者P=13331,冲突概率低。M通常取M=10E9+7作为模,直接使用**unsigned long long **类型来存储hash值
- 在出现碰撞的情况下,可以构造两组甚至多组hash值来对比确定是否是同一字符串
  
链接:https://leetcode-cn.com/problems/repeated-string-match/
class Solution {
public:
    int strStr(string haystack, string needle) {
        int n = haystack.size(), m = needle.size();
        if (m == 0) {
            return 0;
        }
        long long k1 = 1e9 + 7;
        long long k2 = 1337;
        srand((unsigned)time(NULL));
        long long kMod1 = rand() % k1 + k1;
        long long kMod2 = rand() % k2 + k2;
        long long hash_needle = 0;
        for (auto c : needle) {
            hash_needle = (hash_needle * kMod2 + c) % kMod1;
        }
        long long hash_haystack = 0, extra = 1;
        for (int i = 0; i < m - 1; i++) {
            hash_haystack = (hash_haystack * kMod2 + haystack[i % n]) % kMod1;
            extra = (extra * kMod2) % kMod1;
        }
        for (int i = m - 1; (i - m + 1) < n; i++) {
            hash_haystack = (hash_haystack * kMod2 + haystack[i % n]) % kMod1;
            if (hash_haystack == hash_needle) {
                return i - m + 1;
            }
            hash_haystack = (hash_haystack - extra * haystack[(i - m + 1) % n]) % kMod1;
            hash_haystack = (hash_haystack + kMod1) % kMod1;
        }
        return -1;
    }
    int repeatedStringMatch(string a, string b) {
        int an = a.size(), bn = b.size();
        int index = strStr(a, b);
        if (index == -1) {
            return -1;
        }
        if (an - index >= bn) {
            return 1;
        }
        return (bn + index - an - 1) / an + 2;
    }
};
链接:https://leetcode-cn.com/problems/longest-duplicate-substring/
class Solution {
public:
    int n;
    unsigned long long prime = 31;
    string longestDupSubstring(string s) {
        n = s.size();
        int l = 1;
        int r = n - 1;
        int pos = -1;
        int len = 0;
        auto find = [&](int len){
            unsigned long long hash = 0;
            unsigned long long power = 1;
            for (int i = 0; i < len; i++) {
                hash = hash * prime + (s[i] - 'a');
                power *= prime;
            }
            unordered_set<unsigned long long> exist{hash};
            for(int i = len; i < n; i++) {
                hash = hash * prime - power * (s[i-len] - 'a') + (s[i] - 'a');
                if (exist.count(hash)) return (i - len + 1);
                exist.insert(hash);
            }
            return -1;
        };
        while(l <= r) {
            int mid = (l + r) / 2;
            int start = find(mid);
            if (start != -1) {
                len = mid;
                pos = start;
                l = mid + 1;
            } else {
                r = mid - 1;
            }
        }
        if (pos == -1) return "";
        else return s.substr(pos, len);
    }
};
 
                    
                
 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号