convert.py

#!/bin/python
#coding=utf-8
import re,os

filterOne='http://www.catking.com/'
def getFilesNames():
    return [i for i in os.listdir('./') if 'asp'  in i]
def replaceContent():
    filesNames=getFilesNames()
    for name in filesNames:
        files=open(name)
        saveFile=open(name.replace('.','').replace('?','').replace('#','').replace('=','').replace('&','')+'.html','a')
        for line in files:
            if 'aspx' in line:
                urls = [i for i in re.findall(r'href=[\'"]?([^\'" >]+)', line) if 'asp' in i]
                replace=[i.replace(filterOne,'') if 'http' in i else i for i in urls]
                replace=[i.replace('.','').replace('=','').replace('?','').replace('#','').replace('&','')+'.html' for i in replace]
                replace=dict(zip(urls,replace))
                for k,v in replace.items():
                    line=line.replace(k,v)
                saveFile.write(line)
            else:
                saveFile.write(line)
        saveFile.close()

replaceContent()

  

posted @ 2013-11-02 15:38  Epirus  阅读(355)  评论(0)    收藏  举报