基于trie树做一个ac自动机

基于trie树做一个ac自动机


#!/usr/bin/python
# -*- coding: utf-8 -*-



class Node:
    def __init__(self):
        self.value = None
        self.children = {}  # children is of type {char, Node}
        self.fre = 0
        self.father = None
        self.fail = None


def CMP(a, b):
    return b.fre - a.fre


class Trie:
    def __init__(self):
        self.root = Node()
        self.choose = []
        self.__OpenCorrect__ = 0

    def insert(self, key):  # key is of type string
        # key should be a low-case string, this must be checked here!
        node = self.root
        for char in key:
            if char not in node.children:
                child = Node()
                node.children[char] = child
                child.value = char
                child.father = node
                node = child
            else:
                node = node.children[char]
        # node.value = key
        node.fre += 1


    def find_node(self, string):
        res_node = self.root
        try:
            for i in string:
                res_node = res_node.children[i]
        except:
            res_node = None
        return res_node

    def buildac3(self):
        queuelist = []
        queuelist.append(self.root)
        while len(queuelist) > 0:
            temp = queuelist.pop()
            p = None
            for k, v in temp.children.items():
                if temp == self.root:
                    temp.children[k].fail = self.root
                else:
                    p = temp.fail
                    while p is not None:
                        if p.children.has_key(k):
                            temp.children[k].fail = p.children[k]
                            break
                        p = p.fail
                    if p is None:
                        temp.children[k].fail = self.root
                queuelist.append(temp.children[k])

    def acfind(self, content):
        count = 0
        content2 = content
        while len(content2) > 1:
            p = self.root
            result = []
            startWordIndex = 0
            endWordIndex = -1
            currentPosition = 0
            while currentPosition < len(content2):
                word = content2[currentPosition]
                while p.children.has_key(word) == False and p != self.root:
                    p = p.fail
                if p.children.has_key(word):
                    if p == self.root:
                        startWordIndex = currentPosition
                    p = p.children[word]
                else:
                    p = self.root
                if p.fre > 0 and currentPosition - startWordIndex < len(content) - 1:
                    result.append((startWordIndex + count, currentPosition + count))
                currentPosition += 1
            for i in result:
                print content[i[0]:i[1] + 1]
            print result
            count += 1
            content2 = content2[1:]

if __name__ == '__main__':
    trie = Trie()
    trie.__OpenCorrect__ = 1
    trie.insert("she")
    trie.insert("he")
    trie.insert("her")
    trie.insert("hers")
    trie.buildac3()
    # print trie.find_node('sw')
    # print trie.root.children['s'].children['h'].fail.value
    print trie.acfind('shers')

posted @ 2016-10-08 19:09  唯心不易  阅读(566)  评论(0编辑  收藏  举报