changetoutf-8

 1 import chardet
 2 import os
 3 # ANSI文件转UTF-8
 4 import codecs
 5 import os
 6 
 7 def strJudgeCode(str):
 8     return chardet.detect(str)
 9 
10 def readFile(path):
11 
12     f = open(path, 'r',endoding='ANSI')
13     filecontent = f.read()
14     f.close()
15 
16     return filecontent
17 
18 def WriteFile(str, path):
19     try:
20         f = open(path, 'w')
21         f.write(str)
22     finally:
23         if f:
24             f.close()
25 
26 def converCode(path):
27     file_con = readFile(path)
28     result = strJudgeCode(file_con)
29     #print(file_con)
30     if result['encoding'] == 'utf-8':
31         #os.remove(path)
32         a_unicode = file_con.decode('utf-8')
33         gb2312 = a_unicode.encode('gbk')    
34         WriteFile(gb2312, path)
35 
36 def listDirFile(dir):
37     list = os.listdir(dir)
38     for line in list:
39         print(line)
40         filepath = dir+line
41         print(filepath)
42         # if os.path.isdir(filepath):
43         #     listDirFile(filepath)
44         # else:
45         #     print(line)
46         converCode(filepath)            
47 
48 if __name__ == '__main__':
49 
50     # listDirFile('./TRMD/')
51  
52     # 文件所在目录
53     file_path =r"C:\\Users\\Lenovo\\Desktop\\数据库设计\\爬虫脚本\\TRMD\\test"
54     files = os.listdir(file_path)
55      
56     for file in files:
57         file_name = file_path + '\\' + file
58         f = codecs.open(file_name, 'r','cp852')
59         ff = f.read()
60         file_object = codecs.open(file_path + '\\' + file, 'w', 'utf-8')
61         file_object.write(ff)

posted on 2017-08-25 13:44 懵懂的菜鸟阅读(185) 评论(0) 收藏举报

刷新页面返回顶部

懵懂的菜鸟

导航

公告

changetoutf-8