tempValue = 0
def replaceTextByTable(pageText, headerText, footerText, tableText, preFooterPosition=0):
headerPosition = pageText.find(headerText, preFooterPosition)
footerPosition = pageText.find(footerText, preFooterPosition)
while True:
# 如果找到的footerPosition在headerPosition前面,重新找下一个footerPosition
if footerPosition <= headerPosition:
footerPosition = pageText.find(footerText, footerPosition + 1)
# 如果找到的前后下标比table的长度长,即找下一个headerPosition
elif abs(footerPosition - headerPosition + len(footerText) - tempValue) > len(tableText):
while True:
tempHeaderPosition = pageText.find(headerText, headerPosition + 1)
if tempHeaderPosition != -1 and abs(
footerPosition - tempHeaderPosition + len(footerText) - tempValue) > len(tableText):
headerPosition = tempHeaderPosition
else:
break
# 如果找到的前后下标比table的长度短,即找下一个footerPosition
elif abs(footerPosition - headerPosition + len(footerText) - tempValue) < len(tableText):
while True:
tempFooterPosition = pageText.find(footerText, footerPosition + 1)
if tempFooterPosition != -1 and abs(
footerPosition - tempFooterPosition + len(footerText) - tempValue) < len(tableText):
footerPosition = tempFooterPosition
else:
break
else:
break
oldStr = pageText[headerPosition: footerPosition + len(footerText)]
newStr = pageText.replace(oldStr, tableText, 1)
print("-------------------")
print("ori str:" + pageText)
print("old str:" + oldStr)
print("new str:" + newStr)
print("-------------------")
return newStr, footerPosition + len(footerText)
# # 表格前面字符串在文档中出现的下标数组
# headerPositions = getPositions(pageText, headerText)
# # 表格最后面的字符串在文档中出现的下标数据
# footerPositions = getPositions(pageText, footerText)
#
# # 表格的字符串长度
# tableLength = len(tableText)
# # gapLength为前后两个下标中间的字符串长度与tableText的长度差,长度差越小说明该前后下标所在的字符串就是需要替换的字符串
# minLength = tableLength
# # 记录前后两个下标位置
# gapHeaderPosition = -1
# gapFooterPosition = -1
#
# # 前后下标两两计算长度,与tableText的长度进行比较
# for pHeader in headerPositions:
# for pFooter in footerPositions:
# indexDistance = abs(int(pHeader) - int(pFooter))
# # 此处要更新前后两个下标位置,条件是前后两个下标
# gap = abs(indexDistance - tableLength)
# if gap < minLength:
# minLength = gap
# gapHeaderPosition = pHeader
# gapFooterPosition = pFooter
#
# # 如果找到了合适的前后两个下标,则进行字符串替换操作
# if gapHeaderPosition != -1 and gapFooterPosition != -1:
# oldStr = pageText[gapHeaderPosition: gapFooterPosition + len(footerText)]
# newStr = pageText.replace(oldStr, tableText)
# print("oldStr:" + oldStr)
# print("originStr:" + pageText)
# print(" newStr:" + newStr)
# return newStr
# else:
# return pageText
# # print(headerPositions)
# # print(footerPositions)
#
#
# def getPositions(pageText, subText):
# positions = []
# start = 0
# while True:
# # 查找指定字符串在文档中首次出现的下标
# position = pageText.find(subText, start)
# # 如果返回值为-1,则代表没有找到,则循环结束
# if position == -1:
# break
# # 如果找到后将下标保存到数组
# positions.append(position)
# # 重新计算起始查找的下标,+1是从下标后面一个字符串开始查找,否则查询出来会一直是0
# start = position + 1
# return positions
originStr = "hello word + you. Do you want word + you have word + you?"
newStr1, preFooterPosition1 = replaceTextByTable(originStr, "word", "you", "XXXXXXXXXX")
newStr2, preFooterPosition2 = replaceTextByTable(newStr1, "word", "you", "XXXXXXXXXX", preFooterPosition1)
newStr3, preFooterPosition3 = replaceTextByTable(newStr2, "word", "you", "XXXXXXXXXX", preFooterPosition2)
# replaceTextByTable("hello word, i have a word give you. Do you want have?", "word", "you",
# "XXXXXXXXXXXXXXXXXXXXX") # 2:4
# replaceTextByTable("hello word, i have a word give you. Do you want have?", "word", "you",
# "XXXXXXXXXXXXXXXXXXXXXXXXXXXX") # 1:3
# replaceTextByTable("hello word, i have a word give you. Do you want have?", "word", "you",
# "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") # 1:4