1 import chardet
2 import os
3 # ANSI文件转UTF-8
4 import codecs
5 import os
6
7 def strJudgeCode(str):
8 return chardet.detect(str)
9
10 def readFile(path):
11
12 f = open(path, 'r',endoding='ANSI')
13 filecontent = f.read()
14 f.close()
15
16 return filecontent
17
18 def WriteFile(str, path):
19 try:
20 f = open(path, 'w')
21 f.write(str)
22 finally:
23 if f:
24 f.close()
25
26 def converCode(path):
27 file_con = readFile(path)
28 result = strJudgeCode(file_con)
29 #print(file_con)
30 if result['encoding'] == 'utf-8':
31 #os.remove(path)
32 a_unicode = file_con.decode('utf-8')
33 gb2312 = a_unicode.encode('gbk')
34 WriteFile(gb2312, path)
35
36 def listDirFile(dir):
37 list = os.listdir(dir)
38 for line in list:
39 print(line)
40 filepath = dir+line
41 print(filepath)
42 # if os.path.isdir(filepath):
43 # listDirFile(filepath)
44 # else:
45 # print(line)
46 converCode(filepath)
47
48 if __name__ == '__main__':
49
50 # listDirFile('./TRMD/')
51
52 # 文件所在目录
53 file_path =r"C:\\Users\\Lenovo\\Desktop\\数据库设计\\爬虫脚本\\TRMD\\test"
54 files = os.listdir(file_path)
55
56 for file in files:
57 file_name = file_path + '\\' + file
58 f = codecs.open(file_name, 'r','cp852')
59 ff = f.read()
60 file_object = codecs.open(file_path + '\\' + file, 'w', 'utf-8')
61 file_object.write(ff)