Python中文文件读写&参数传递
文本一些冗余标点符号清洗
#encoding=utf-8 import sys import re outfile = 'result.txt' file = sys.argv[1] if len(sys.argv) > 2: outfile = sys.argv[2] print("Deading" + file + " now...\n") lines = [] n = 0 with open(file, 'r', encoding='UTF-8') as f: #打开文件 for line in f: line.strip() #去掉换行符 line,nu = re.subn(r'`','\'',line) if nu > 0 : print("eedddddd"+str(nu)) line,nu = re.subn(r'"\s{0,}"|\'\s{0,}"|\'\s{0,}\'|\'\s{0,}"','"',line) ch_en = re.split(r"\|\|\|", line) ch = ch_en[0] en = ch_en[1] #if(): lines.append(ch + '|||' + en +'\n') with open(outfile, 'w', encoding='utf-8') as g: #写文件 for line in lines: g.write(line) # for line in lines: # try: # print(line) # except UnicodeEncodeError as e: # print('UnicodeEncodeError') # print("\n Please open the " + outfile + "(current path)!!")
浙公网安备 33010602011771号