#!/bin/python
#coding=utf-8
import re,os
filterOne='http://www.catking.com/'
def getFilesNames():
return [i for i in os.listdir('./') if 'asp' in i]
def replaceContent():
filesNames=getFilesNames()
for name in filesNames:
files=open(name)
saveFile=open(name.replace('.','').replace('?','').replace('#','').replace('=','').replace('&','')+'.html','a')
for line in files:
if 'aspx' in line:
urls = [i for i in re.findall(r'href=[\'"]?([^\'" >]+)', line) if 'asp' in i]
replace=[i.replace(filterOne,'') if 'http' in i else i for i in urls]
replace=[i.replace('.','').replace('=','').replace('?','').replace('#','').replace('&','')+'.html' for i in replace]
replace=dict(zip(urls,replace))
for k,v in replace.items():
line=line.replace(k,v)
saveFile.write(line)
else:
saveFile.write(line)
saveFile.close()
replaceContent()