Python处理utf-8 添加和删除BOM头

***************************************************************

#此脚本处理UTF-8 and ASSIC 文件。

#用到的第三方模块:chardet、codecs、sys、os

**************************************************************

import chardet
import os
import codecs
import sys

def addBom(strpath,curLen,Ifadd):  # 定义函数addBom,作用是判断文件编码,并对文件进行处理。
newcontent = ''  # 定义一个空的内容
f = open(strpath, 'rb')
fcontent = f.read()
f.close()
printBuffer = strpath[curLen:]  # 从脚本根目录开始输出文件路径
codeType = chardet.detect(fcontent)['encoding'] #判断原文件编码格式
# print (type (codeType))
printBuffer = strpath[curLen:] + " " + str(codeType)

if Ifadd and fcontent[:3] != codecs.BOM_UTF8: #判断文件编码是否带头,并加执行加头命令
#print ( printBuffer + " " + "add BOM" )
newcontent = codecs.BOM_UTF8 #给新文件内容加头
newcontent = newcontent + fcontent
newcodeType = chardet.detect(newcontent)['encoding']
print ( printBuffer + " " + "AddBOM:" + " " + str(newcodeType) )
elif not Ifadd and fcontent[:3] == codecs.BOM_UTF8: #判断文件编码是否带头,并加执行去头命令
newcontent = fcontent[3:]
newcodeType = chardet.detect(newcontent)['encoding']
print ( printBuffer + " " + "RemoveBOM:" + " " + str(newcodeType) )
else:
return
fnew = open (strpath, "wb+")
fnew.write(newcontent)
fnew.close()
return

if __name__ == "__main__":
exts = ['.js','.xml','.yml','.html','.htm','.jsx','.msg','.xlf','.po','.json','.txt','.pslxml','.ts','.tsx']  #支持的文件格式,可以再加新文件类型
if sys.argv[1] == "-r": 
Ifadd = False
else:
Ifadd = True
curLen = len(os.getcwd()) 
for root,dirs,files in os.walk(os.getcwd()):
for file in files:
if os.path.splitext(file)[1] in exts:  #判断文件类型是否在exts 里面
addBom (os.path.join(root,file),curLen,Ifadd)

if sys.argv[1] == "-r":
print ( "All files were removed BOM." )
else:
print ( "All files were add BOM." )

 

posted @ 2021-05-28 15:06  懵懂的small菜鸟  阅读(852)  评论(0)    收藏  举报