# kmp算法简明教程

 i = 0   1   2   3   4   5   6  7   8   9  10  11  12  13
s = a   b   a   a   c   a   b  a   a   a   b   a   a   b
p = a   b   a   a   b
j = 0   1   2   3   4

'''

'''
def string_index_of( pstr, pattern, pos = 0 ):
str_index = pos
pattern_index = 0
pattern_len = len( pattern )
while str_index < len( pstr ) and pattern_index < pattern_len:
if pstr[ str_index ] == pattern[ pattern_index ]:
str_index += 1
pattern_index += 1
else:
str_index = str_index - pattern_index + 1
pattern_index = 0
if pattern_index == pattern_len:
return str_index - pattern_index
return -1

pstr = 'i am caochao, i love coding!'
pattern = 'ao'
print( string_index_of( pstr, pattern, 7 ) )
print( pstr.find( pattern ) )

kmp算法步骤如下：

1，初始化i，j均为0，

2，依次往后比较s[i]与p[j]，若相等则i，j各自加1，否则保持i不变，j=k(next[j])。若某时刻求得j值为-1，i，j也各自加1然后继续匹配

3，重复步骤2

1，j=0时，next[j]=-1

2，next[j] = max{k|0<k<j且p[0,k-1]=p[j-k,j-1]}

3，其它情况，next[j]=1

p[0,k-1]=p[j-k,j-1]

1，若p[k]=p[j]，则表明在p中存k，且不存在k'>k满足关系p[0,k]=p[j-k,j]，那么next[j+1]=k+1，即

next[j+1]=next[j]+1

2，若p[k]不等于p[j]，此时可把求next函数的过程看成模式匹配的过程，即p既是主串又是模式串。而在模式匹配过种中，此时应当让p[j]与p[next[k]]继续比较。

next[j+1]=next[next[j]]+1

next[j+1]=0

'''
kmp求next[j]数组
'''
def kmp_get_next( pattern ):
i = 0
j = -1
_next = [ 0 ] * len( pattern )
_next[ 0 ] = -1
while i < len( pattern ) - 1:
if j == -1 or pattern[ i ] == pattern[ j ]:
i += 1
j += 1
_next[ i ] = j
else:
j = _next[ j ]
return _next

j       =   0    1    2    3    4
p       =   a    a    a    a    b
next[j] =  -1    0    1    2    3

'''
kmp求next[j]数组
'''
def kmp_get_next( pattern ):
i = 0
j = -1
_next = [ 0 ] * len( pattern )
_next[ 0 ] = -1
while i < len( pattern ) - 1:
if j == -1 or pattern[ i ] == pattern[ j ]:
i += 1
j += 1
if pattern[ i ] == pattern[ j ]:
_next[ i ] = _next[ j ]
else:
_next[ i ] = j
else:
j = _next[ j ]
return _next

'''
kmp求子串位置
'''
def kmp_index_of( pstr, pattern, pos = 0 ):
_next = kmp_get_next( pattern )
str_index = pos
pattern_index = 0
pattern_len = len( pattern )
while str_index < len( pstr ) and pattern_index < pattern_len:
if pattern_index == -1 or pstr[ str_index ] == pattern[ pattern_index ]:
str_index += 1
pattern_index += 1
else:
pattern_index = _next[ pattern_index ]
if pattern_index == pattern_len:
return str_index - pattern_index
return -1

pstr = 'i am caochao, i love coding!'
pattern = 'ao'
print( kmp_index_of( pstr, pattern, 7 ) )
print( pstr.find( pattern ) )

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p = a   b   a   a   b
j = 0   1   2   3   4

s[4]不等于p[4]，令i=1，j=0

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p =     a   b   a   a   b
j =     0   1   2   3   4

s[1]不等于p[0]，令i=2，j=0

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p =         a   b   a   a   b
j =         0   1   2   3   4

s[3]不等于p[1]，令i=3，j=0

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p =             a   b   a   a   b
j =             0   1   2   3   4

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p =                                     a   b   a   a   b
j =                                     0   1   2   3   4

kmp算法：

p串next数组为：

j       =   0    1    2    3    4
p       =   a    b    a    a    b
next[j] =  -1    0    0    1    1

next数组优化过后变为：

j       =   0    1    2    3    4
p       =   a    b    a    a    b
next[j] =  -1    0   -1    1    0

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p = a   b   a   a   b
j = 0   1   2   3   4

s[4]不等于p[4]，令j=0

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p =                 a   b   a   a   b
j =                 0   1   2   3   4

s[4]不等于p[0]，next[0]=-1，因此i，j各自加1。i=5，j=0

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p =                     a   b   a   a   b
j =                     0   1   2   3   4

i++，j++，直到s[9]不等于p[4]，令j=0

 i = 0   1   2   3   4   5   6   7   8   9  10  11  12  13
s = a   b   a   a   c   a   b   a   a   a   b   a   a   b
p =                                     a   b   a   a   b
j =                                     0   1   2   3   4

posted @ 2014-12-17 09:02 caochao88 阅读(...) 评论(...) 编辑 收藏