Python中文文件读写&参数传递

 

文本一些冗余标点符号清洗

#encoding=utf-8
import sys  
import re
outfile = 'result.txt'
file = sys.argv[1]     
if len(sys.argv) > 2:
    outfile = sys.argv[2]
print("Deading" + file + " now...\n")

lines = []
n = 0
with open(file, 'r', encoding='UTF-8') as f:    #打开文件
    for line in f:             
        line.strip() #去掉换行符
        line,nu = re.subn(r'`','\'',line)
        if nu > 0 :
            print("eedddddd"+str(nu))
        line,nu = re.subn(r'"\s{0,}"|\'\s{0,}"|\'\s{0,}\'|\'\s{0,}"','"',line)
        ch_en = re.split(r"\|\|\|", line)
        ch = ch_en[0]
        en = ch_en[1]
        
        #if():
            
        lines.append(ch + '|||' + en +'\n')
        

with open(outfile, 'w', encoding='utf-8') as g:       #写文件
    for line in lines:
        g.write(line)
# for line in lines:
    # try:
        # print(line)
    # except UnicodeEncodeError as e:
        # print('UnicodeEncodeError')
        # print("\n      Please open the " + outfile + "(current path)!!")
    

 

posted @ 2017-12-11 14:31  hozhangel  阅读(644)  评论(0)    收藏  举报