模式匹配的朴素算法与KMP算法及代码实现

模式匹配的朴素算法是在匹配过程中,将模式串一位一位地往后移动。而更高效的KMP算法是在匹配过程中出现字符不相等时,模式串利用已经得到的“部分匹配表”结果将模式串向右滑动若干位,重新开始下一趟的匹配,例如对于主串“acabaabaabcac”,模式串“abaabcac”,利用KMP算法进行匹配的过程是:

 

#!/usr/bin/env python
# -*- coding:utf-8 -*-

"""
KMP算法工具
"""

class KPM_Matcher():
    def __init__(self):
        self.__measured_array = None
        self.__target_array = None

    def kmp_match(self, measured_array, target_array):
        """
        kmp算法
        :param measured_array:
        :param target_array:
        :return:
        """
        self.__measured_array = len(measured_array)
        self.__target_array = len(target_array)
        table = self.partial_table(target_array)
        cur = 0    #移动指针
        while cur <= self.__measured_array - self.__target_array:
            for i in range(self.__target_array):
                if measured_array[i + cur] != target_array[i]:
                    cur += max(i - table[i-1], 1)
                    break
            else:
                return True
        return False

    def partial_table(self, target_array):
        """
        部分匹配表
        :param target_array:
        :return:
        """
        prefix = set()
        postfix = set()
        ret = [0]
        for i in range(1, self.__target_array):
            prefix.add(target_array[:i])
            # print("pre:{}".format(prefix))
            postfix = {target_array[j:i+1] for j in range(1, i+1)}
            # print("post:{}".format(postfix))
            ret.append(len((prefix & postfix or {''}).pop()))
            # print(ret)
        return ret

    def naive_match(self, measured_array, target_array):
        """
        朴素匹配算法,将字符串一位一位地往后移动进行匹配
        :param measured_array:
        :param target_array:
        :return:
        """
        self.__measured_array = len(measured_array)
        self.__target_array = len(target_array)
        for i in range(self.__measured_array - self.__target_array + 1):
            if measured_array[i:i + self.__target_array] == target_array:
                return True
        return False


if __name__ == "__main__":
    kmp = KPM_Matcher()
    print(kmp.kmp_match("123456789","4545"))
    print(kmp.naive_match("123456789","123"))

  

posted @ 2018-04-03 10:34  lzp的bky  阅读(262)  评论(0编辑  收藏  举报