字符串哈希

算法原理: 将一个字符串看成是一个P 进制的数字。

代码模板:

# python  
def __init__(self, s):
        n = len(s)
        self.BASE = BASE = 131  # 进制 131,131313
        self.MOD = MOD = 10 ** 13 + 7  # 10**9+7,998244353,10**13+7
        self.h = h = [0] * (n + 1)
        self.p = p = [1] * (n + 1)
        for i in range(1, n + 1):
            p[i] = (p[i - 1] * BASE) % MOD
            h[i] = (h[i - 1] * BASE % MOD + ord(s[i - 1])) % MOD

 def get_hash(self, l, r):
        return (self.h[r] - self.h[l - 1] * self.p[r - l + 1] % self.MOD) % self.MOD


h = sh.get_hash(l, r)

// 注意这里有个映射，我们的字符串哈希值从1开始，题目中给出的字符串多从0开始，从str -> h 只要所有下标都加1 即可。

// c++

// P可以取1331和131
// 取模的话MOD 可以有1e9 + 7 和1e9 + 9
// 双哈希就是使用进制和模运算 上面的P和MOD 随便组合一下

ull get(int l,int r){ 
　　return h[r]-h[l-1]*p[r-l+1]; // ((h[r]-h[l-1]*p[r-l+1]) + MOD) % MOD
}

p[0] = 1;
for(int i = 1;i<=n;i++){
    h[i] = h[i-1]*P+str[i];
    p[i] = p[i-1]*P;
}

class StringHash:
    def __init__(self, s: str):
        n = len(s)
        self.MOD = MOD = 10**13 + 7
        self.BASE = BASE = 131
        self.h = h = [0] * (n + 10)
        self.p = p = [1] * (n + 10)
        for i in range(1, n + 1):
            p[i] = (p[i - 1] * BASE) % MOD
            h[i] = (h[i - 1] * BASE % MOD + ord(s[i - 1])*2) % MOD

    def get_hash(self, l, r):
        return (self.h[r + 1] - self.h[l] * self.p[r - l + 1] % self.MOD) % self.MOD

class Solution:
    def findRepeatedDnaSequences(self, s: str) -> List[str]:
        hs = StringHash(s)
        n = len(s)

        vis = Counter()
        ans = []

        for i in range(n - 9):
            h = hs.get_hash(i,i+9)
            vis[h] += 1
            if vis[h] == 2:
                ans.append(s[i:i+10])

        return ans

#include<iostream>
#include<cstdio>
#include<algorithm>
#include<cstring>

using namespace std;

typedef unsigned long long ULL;

const int N = 1e5 + 10, P = 131;

int n, m;
char str[N];
ULL h[N], p[N];

ULL get(int l, int r)
{
    return h[r] - h[l - 1] * p[r - l + 1];
}

int main()
{
    cin >> n >> m;
    cin >> str + 1;
    
    p[0] = 1;
    for(int i = 1; i <= n; i ++ )
    {
        h[i] = h[i - 1] * P + str[i];
        p[i] = p[i - 1] * P;
    }
    
    while(m -- )
    {
        int l1, r1, l2, r2;
        cin >> l1 >> r1 >> l2 >> r2;
        if(get(l1, r1) == get(l2, r2)) cout << "Yes" << endl;
        else cout << "No" << endl;
    }
    
    return 0;
}

#include<iostream>
#include<cstdio>
#include<algorithm>
#include<cstring>
#include<unordered_set>

using namespace std;

typedef unsigned long long ULL;

const int N = 2e5 + 10, P = 131;

char str1[N], str2[N];
ULL h[N], p[N];

ULL get(int l, int r) {
    return h[r] - h[l - 1] * p[r - l + 1];
}

int main() {
    scanf("%s%s", str1 + 1, str2 + 1);
    int n = strlen(str1 + 1), m = strlen(str2 + 1);
    
    p[0] = 1;
    for(int i = 1; i <= m; i ++ ) {
        h[i] = h[i - 1] * P + str2[i];
        p[i] = p[i - 1] * P;
    }
    
    vector<int> cnt1(26, 0);
    vector<int> cnt2(26, 0);
    
    for(auto &ch: str1) cnt1[ch - 'a'] ++ ;
    
    auto check = [&]() {
        for(int i = 0; i < 26; i ++ ) {
            if(cnt1[i] != cnt2[i]) return false;
        }
        return true;
    };
    
    unordered_set<ULL> hash;
    
    for(int i = 1, j = 1; i <= m; i ++ ) {
        cnt2[str2[i] - 'a'] ++ ;
        if(i < n) continue;
        if(check()) hash.insert(get(j, i));
        cnt2[str2[j ++ ] - 'a'] -- ;
    }
    
    cout << hash.size() << endl;
    
    return 0;
}

#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<unordered_set>

using namespace std;

typedef unsigned long long ULL;

const int N = 2e5 + 10, P = 131;
bool f[N];

int main() {
    unordered_set<ULL> hash;
    
    string str;
    while(cin >> str, str != ".") {
        ULL h = 0;
        for(int i = str.size() - 1; i >= 0; i -- ) {
            h = h * P + str[i];
        }
        hash.insert(h);
    }
    
    str.clear();
    string line;
    while(cin >> line) str += line;
    
    int res = 0;
    f[0] = true;
    for(int i = 1; i <= str.size(); i ++ ) {
        ULL h = 0;
        for(int j = i; j > i - 10; j -- ) {
            h = h * P + str[j - 1];
            if(hash.count(h) && f[j - 1]) {
                f[i] = true;
                res = i;
                break;
            }
        }
    }
    
    cout << res << endl;
    
    return 0;
}

posted @ 2023-11-05 09:17 深渊之巅阅读(50) 评论(0) 收藏举报

刷新页面返回顶部

zk6696

字符串哈希

公告