Python处理utf-8 添加和删除BOM头
***************************************************************
#此脚本处理UTF-8 and ASSIC 文件。
#用到的第三方模块:chardet、codecs、sys、os
**************************************************************
import chardet
import os
import codecs
import sys
def addBom(strpath,curLen,Ifadd): # 定义函数addBom,作用是判断文件编码,并对文件进行处理。
newcontent = '' # 定义一个空的内容
f = open(strpath, 'rb')
fcontent = f.read()
f.close()
printBuffer = strpath[curLen:] # 从脚本根目录开始输出文件路径
codeType = chardet.detect(fcontent)['encoding'] #判断原文件编码格式
# print (type (codeType))
printBuffer = strpath[curLen:] + " " + str(codeType)
if Ifadd and fcontent[:3] != codecs.BOM_UTF8: #判断文件编码是否带头,并加执行加头命令
#print ( printBuffer + " " + "add BOM" )
newcontent = codecs.BOM_UTF8 #给新文件内容加头
newcontent = newcontent + fcontent
newcodeType = chardet.detect(newcontent)['encoding']
print ( printBuffer + " " + "AddBOM:" + " " + str(newcodeType) )
elif not Ifadd and fcontent[:3] == codecs.BOM_UTF8: #判断文件编码是否带头,并加执行去头命令
newcontent = fcontent[3:]
newcodeType = chardet.detect(newcontent)['encoding']
print ( printBuffer + " " + "RemoveBOM:" + " " + str(newcodeType) )
else:
return
fnew = open (strpath, "wb+")
fnew.write(newcontent)
fnew.close()
return
if __name__ == "__main__":
exts = ['.js','.xml','.yml','.html','.htm','.jsx','.msg','.xlf','.po','.json','.txt','.pslxml','.ts','.tsx'] #支持的文件格式,可以再加新文件类型
if sys.argv[1] == "-r":
Ifadd = False
else:
Ifadd = True
curLen = len(os.getcwd())
for root,dirs,files in os.walk(os.getcwd()):
for file in files:
if os.path.splitext(file)[1] in exts: #判断文件类型是否在exts 里面
addBom (os.path.join(root,file),curLen,Ifadd)
if sys.argv[1] == "-r":
print ( "All files were removed BOM." )
else:
print ( "All files were add BOM." )