字符串多模匹配---最左优先

#pragma once

#include <string>
#include <unordered_map>
#include <vector>

struct Node{
int fail;
std::string change_word;
std::string word;
using TransMap = std::unordered_map<char, int>;
TransMap trans;
};
class ChangeWordTrie {
public:
inline void Init(int capacity = 100000) {
trie_.reserve(capacity);
rt_ = NewNode();
}

void Insert(const std::string& text, const std::string& word) {
int u = rt_;
for (size_t i = 0; i < text.size(); ++i) {
char c = text[i];
if (!trie_[u].trans.count(c)) trie_[u].trans[c] = NewNode();
u = trie_[u].trans[c];
}
trie_[u].word = word;
trie_[u].change_word = text;
}
void Build() {
std::queue<int> Q;
trie_[rt_].fail = rt_;
for (const auto& p : trie_[rt_].trans) {
char c = p.first;
int nxt = trie_[rt_].trans[c];
trie_[nxt].fail = rt_;
Q.push(nxt);
}

// BFS 构建 Trie图
while (!Q.empty()) {
int u = Q.front();
Q.pop();
for (const auto& p : trie_[u].trans) {
char c = p.first;
int u_fail = trie_[u].fail;
int nxt = trie_[u].trans[c];
while (u_fail != rt_ && !trie_[u_fail].trans.count(c)) {
u_fail = trie_[u_fail].fail;
}
trie_[nxt].fail = trie_[u_fail].trans.count(c) ? trie_[u_fail].trans[c] : rt_;
Q.push(nxt);
}
}
}

std::string Search(const std::string& text) {
/* TODO : 通过合并优化last数组遍历单词节点的效率 保证复杂度 */

std::string result = text;
int u = rt_;
int change = 0; //替换变种词后源字符串长度的改变
for (size_t i = 0; i < text.size(); ++i) {
char c = text[i];
while (u != rt_ && !trie_[u].trans.count(c)) {
u = trie_[u].fail;
}
if (trie_[u].trans.count(c)) u = trie_[u].trans[c];
if (!trie_[u].word.empty()) {
result = result.replace(i - trie_[u].change_word.length() + 1 - change, trie_[u].change_word.length(), trie_[u].word);
change += trie_[u].change_word.length() - trie_[u].word.length();
u = rt_;
}
}
return result;
}

private:
int rt_; // Root of trie
std::vector<Node> trie_;

int NewNode() {
trie_.emplace_back();
return trie_.size() - 1;
}
};
posted @ 2021-12-30 15:49  花花公子1234  阅读(59)  评论(0)    收藏  举报