字符串匹配经典问题整理
KMP算法
class Solution:
def strStr(self, s: str, pattern: str) -> int:
if len(pattern) == 0:
return 0
ne = [0] * len(pattern)
ne[0], k = -1, 0
for i in range(2, len(pattern)):
while k != 0 and pattern[k] != pattern[i -1]:
k = ne[k]
if pattern[i - 1] == pattern[k]:
ne[i] = k + 1
k += 1
j = 0
for i in range(len(s)):
while j > 0 and pattern[j] != s[i]:
j = ne[j]
if s[i] == pattern[j]:
j += 1
if j == len(pattern):
return i - j + 1
return -1
2、找出数组中的美丽下标 II -- 匹配多个下标
class Solution:
def beautifulIndices(self, s: str, a: str, b: str, k: int) -> List[int]:
def kmp(s, pattern):
if len(pattern) == 0:
return 0
ne = [0] * len(pattern)
ne[0], k = -1, 0
for i in range(2, len(pattern)):
while k != 0 and pattern[k] != pattern[i -1]:
k = ne[k]
if pattern[i - 1] == pattern[k]:
ne[i] = k + 1
k += 1
j, res = 0, []
for i in range(len(s)):
while j > 0 and pattern[j] != s[i]:
j = ne[j]
if s[i] == pattern[j]:
j += 1
if j == len(pattern):
res.append(i - j + 1)
j = j - 1
j = ne[j]
while j > 0 and pattern[j] != s[i]:
j = ne[j]
if s[i] == pattern[j]:
j += 1
return res
a1, b1 = kmp(s, a), kmp(s, b)
res = []
for idx in a1:
left = bisect_left(b1, idx - k)
right = bisect_right(b1, idx + k) - 1
if left <= right and left < len(b1):
res.append(idx)
return res
3、将单词恢复初始状态所需的最短时间 II -- 自身匹配
class Solution:
def minimumTimeToInitialState(self, word: str, kk: int) -> int:
def kmp(needle):
if len(needle) == 0:
return 0
ne = [0] * len(needle)
ne[0], k = -1, 0
for i in range(2, len(needle)):
while k != 0 and needle[k] != needle[i -1]:
k = ne[k]
if needle[i - 1] == needle[k]:
ne[i] = k + 1
k += 1
j, i, res = len(needle), len(needle) - 1, -1
while j > 0:
if len(needle) - j > 0 and (len(needle) - j) % kk == 0:
return (len(needle) - j) // kk
j = j - 1
j = ne[j]
while j > 0 and needle[j] != needle[i]:
j = ne[j]
if needle[i] == needle[j]:
j += 1
return res
res = kmp(word)
if res == -1:
return len(word) // kk + (len(word) % kk > 0)
return res
4、kmp算法不适合匹配通配符,例如替换字符后匹配,正确解法是暴力匹配算法:
class Solution:
def matchReplacement(self, s: str, sub: str, m: List[List[str]]) -> bool:
d = defaultdict(set)
for i, j in m:
d[i].add(j)
for i in range(len(s) - len(sub) + 1):
for j in range(len(sub)):
if s[i + j] != sub[j] and s[i + j] not in d[sub[j]]:
break
else:
return True
return False
5、树形kmp -- 二叉树中的链表
# Definition for singly-linked list.
# class ListNode:
# def __init__(self, val=0, next=None):
# self.val = val
# self.next = next
# Definition for a binary tree node.
# class TreeNode:
# def __init__(self, val=0, left=None, right=None):
# self.val = val
# self.left = left
# self.right = right
class Solution:
def isSubPath(self, head: ListNode, root: TreeNode) -> bool:
p, k = head.next, head
head.last = None
while p:
p.last = head
p = p.next
p = head.next
while p and p.next:
while k != head and k.val != p.val:
k = k.last
if k.val == p.val:
p.next.last = k.next
k = k.next
p = p.next
@cache
def dfs(p, q):
if not q: return True
if not p: return False
res = False
if p.val == q.val:
return dfs(p.left, q.next) or dfs(p.right, q.next)
if res: return True
if q.last: res = dfs(p, q.last) or dfs(p, q.last)
if res: return True
return dfs(p.left, head) or dfs(p.right, head)
if not root: return False
return dfs(root, head)
6、无限重复kmp算法 -- 最大重复子字符串
class Solution:
def maxRepeating(self, sequence: str, word: str) -> int:
ne = defaultdict(int)
def build():
ne[0], k, j = -1, 0, 2
c = 1
while True:
i = j % len(word)
while k != 0 and word[k % len(word)] != word[i -1]:
k = ne[k]
if word[i - 1] == word[k % len(word)]:
ne[j] = k + 1
k += 1
j += 1
if j % len(word) == 0:
yield c
c += 1
m = build()
c = next(m)
j, maxl = 0, 0
for i in range(len(sequence)):
while j > 0 and word[j % len(word)] != sequence[i]:
j = ne[j]
if sequence[i] == word[j % len(word)]:
j += 1
if j % len(word) == 0:
maxl = max(maxl, j // len(word))
if maxl >= c:
c = next(m)
return maxl
7、重复的子字符串
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
return s in (s+s)[1:-1]
8、旋转字符串
class Solution(object):
def rotateString(self, A, B):
return len(A) == len(B) and B in A+A
9、kmp+ 数位dp -- 找到所有好字符串
class Solution:
def findGoodStrings(self, n: int, s1: str, s2: str, evil: str) -> int:
ne = [0] * len(evil)
ne[0], k = -1, 0
for i in range(2, len(evil)):
while k != 0 and evil[k] != evil[i -1]:
k = ne[k]
if evil[i - 1] == evil[k]:
ne[i] = k + 1
k += 1
@cache
def f(s, i: int, is_limit: bool, j) -> int:
if j == len(evil): return 0
if i == n: return 1
res = 0
up = (ord(s[i]) - ord('a')) if is_limit else 25
for d in range(0, up + 1): # 枚举要填入的数字 d
c = chr(d + ord('a'))
nj = j
while nj > 0 and evil[nj] != c:
nj = ne[nj]
if evil[nj] == c:
nj += 1
res += f(s, i + 1, is_limit and d == up, nj)
return res % (10 ** 9 + 7)
return (f(s2, 0, True, 0) - f(s1, 0, True, 0) + (1 if evil not in s1 else 0)) % (10 ** 9 + 7)
10、最长快乐前缀
class Solution:
def longestPrefix(self, s: str) -> str:
n = len(s)
fail = [-1] * n
for i in range(1, n):
j = fail[i - 1]
while j != -1 and s[j + 1] != s[i]:
j = fail[j]
if s[j + 1] == s[i]:
fail[i] = j + 1
return s[:fail[-1] + 1]
class Solution:
def removeOccurrences(self, s: str, part: str) -> str:
m = len(part)
pi1 = [0] * m # part 的前缀数组
# 更新 part 的前缀数组
j = 0
for i in range(1, m):
while j > 0 and part[i] != part[j]:
j = pi1[j-1]
if part[i] == part[j]:
j += 1
pi1[i] = j
res = []
pi2 = [0] # res 的前缀数组
for ch in s:
# 模拟从左至右匹配的过程
res.append(ch)
# 更新 res 的前缀数组
j = pi2[-1]
while j > 0 and ch != part[j]:
j = pi1[j-1]
if ch == part[j]:
j += 1
pi2.append(j)
if j == m:
# 如果匹配成功,那么删去对应后缀
pi2[-m:] = []
res[-m:] = []
return "".join(res)
12、扩展kmp(z函数)-- 构造字符串的总得分和
class Solution:
def sumScores(self, s: str) -> int:
n = len(s)
z = [0] * n
ans, l, r = n, 0, 0
for i in range(1, n):
z[i] = max(min(z[i - l], r - i + 1), 0)
while i + z[i] < n and s[z[i]] == s[i + z[i]]:
l, r = i, i + z[i]
z[i] += 1
ans += z[i]
return ans
字典树
class Trie:
def __init__(self):
self.children = [None] * 26
self.isEnd = False
def insert(self, word: str) -> None:
node = self
for ch in word:
ch = ord(ch) - ord("a")
if not node.children[ch]:
node.children[ch] = Trie()
node = node.children[ch]
node.isEnd = True
def searchPrefix(self, prefix:str):
node = self
for ch in prefix:
ch = ord(ch) - ord("a")
if not node.children[ch]:
return None
node = node.children[ch]
return node
def search(self, word: str) -> bool:
node = self.searchPrefix(word)
return node is not None and node.isEnd
def startsWith(self, prefix: str) -> bool:
return self.searchPrefix(prefix) is not None
class WordDictionary:
def __init__(self):
"""
Initialize your data structure here.
"""
self.isEnd = False
self.ch = [None] * 26
def addWord(self, word: str) -> None:
p = self
for c in word:
c = ord(c) - ord('a')
if not p.ch[c]:
p.ch[c] = WordDictionary()
p = p.ch[c]
p.isEnd = True
def search(self, word: str) -> bool:
return self.searchSub(self, word)
def searchSub(self, p, word):
for i, c in enumerate(word):
if c == '.':
res = False
for a in p.ch:
if a: res = res or self.searchSub(a, word[i + 1:])
return res
else:
c = ord(c) - ord('a')
if not p.ch[c]:
return False
p = p.ch[c]
return p.isEnd
# Your WordDictionary object will be instantiated and called as such:
# obj = WordDictionary()
# obj.addWord(word)
# param_2 = obj.search(word)
3、统计前后缀下标对 II -- 双字符字典树
class Trie:
def __init__(self):
self.child = {}
self.count = 0
def add(self, cc):
if cc not in self.child:
self.child[cc] = Trie()
self.child[cc].count += 1
return self.child[cc]
def get(self, cc):
if cc not in self.child:
return None
return self.child[cc]
class Solution:
def countPrefixSuffixPairs(self, words: List[str]) -> int:
n = len(words)
root = Trie()
res = 0
for i in reversed(range(n)):
p = root
for j, c in enumerate(words[i]):
cc = c + words[i][-j - 1]
if not p: break
p = p.get(cc)
if p: res += p.count
p = root
for j, c in enumerate(words[i]):
p = p.add(c + words[i][-j - 1])
return res
class Solution:
def countDistinct(self, nums: List[int], k: int, p: int) -> int:
ne = [{}]
def get():
ne.append({})
return len(ne) - 1
for i in range(len(nums)):
cnt, node = 0, 0
for j in range(i, len(nums)):
if nums[j] % p == 0: cnt += 1
if cnt > k: break
if nums[j] not in ne[node]:
ne[node][nums[j]] = get()
node = ne[node][nums[j]]
return len(ne) - 1
5、数组中两个数的最大异或值 -- 异或反向匹配
class Tire:
def __init__(self):
self.ch = [None] * 2
def build(self, nums):
for num in nums:
p = self
for i in range(31, -1, -1):
t = (num >> i) & 1
if not p.ch[t]:
p.ch[t] = Tire()
p = p.ch[t]
class Solution:
def findMaximumXOR(self, nums: List[int]) -> int:
t = Tire()
t.build(nums)
max_n = 0
for num in nums:
p = t
xor = 0
for i in range(31, -1, -1):
n = (num >> i) & 1
m = not n
if not p.ch[m]:
p = p.ch[n]
xor = xor << 1
else:
p = p.ch[m]
xor = xor << 1 | 1
max_n = max(max_n, xor)
return max_n
字符串哈希
P = 31
MOD = 10 ** 9 + 7
class Solution:
def findAnswer(self, parent: List[int], s: str) -> List[bool]:
m = defaultdict(list)
for i, p in enumerate(parent):
if p > -1:
m[p].append(i)
res = [False] * len(parent)
def dfs(p):
r1, r2, retf, retb, le = "", "", 0, 0, 0
for k in m[p]:
t1, t2, f, b, l = dfs(k)
r1 += t1
r2 = t2 + r2
retf = (retf * pow(P, l, MOD) + f) % MOD
retb = (retb + b * pow(P, le, MOD)) % MOD
le += l
else:
r1 += s[p]
r2 = s[p] + r2
retf = (retf * pow(P, 1, MOD) + (ord(s[p]) - ord('a'))) % MOD
retb = (retb + (ord(s[p]) - ord('a')) * pow(P, le, MOD)) % MOD
le += 1
res[p] = (retf == retb and r1 == r2)
return r1, r2, retf, retb, le
dfs(0)
return res
字符串下标索引
class Solution: def processStr(self, s: str, k: int) -> str: n = len(s) size = [0] * n sz = 0 for i, c in enumerate(s): if c == '*': sz = max(sz - 1, 0) elif c == '#': sz *= 2 elif c != '%': # c 是字母 sz += 1 size[i] = sz if k >= size[-1]: # 下标越界 return '.' # 迭代 for i in range(n - 1, -1, -1): c = s[i] sz = size[i] if c == '#': if k >= sz // 2: # k 在复制后的右半边 k -= sz // 2 elif c == '%': k = sz - 1 - k # 反转前的下标为 sz-1-k 的字母就是答案 elif c != '*' and k == sz - 1: # 找到答案 return c
浙公网安备 33010602011771号