python中文乱码问题
1.设置编码格式
import sys
reload(sys)
sys.setdefaultencoding('utf8') # 设置默认编码格式为'utf-8'
2.字典中写入中文,并读取
a="测试"
b="200001"
dic={}
dic.update({a:b})
print dic
print json.dumps(dic, encoding='UTF-8', ensure_ascii=False)
输出:
{'\xe6\xb5\x8b\xe8\xaf\x95': '200001'}
{"测试": "200001"}
3.文件写入中文
import codecs
cc=u"你好"
aa=["测试1", "测试2"]
dic={}
if os.path.exists("zipcode"): #判断zipcode文件是否存在,存在时删除
os.remove("zipcode")
with codecs.open('zipcode','ab','utf8') as f:
dic.update({cc: aa})
print dic
dic=json.dumps(dic, encoding="UTF-8", ensure_ascii=False)
print dic
f.write(dic.encode('utf-8')) #文件中写入中文(写入字典dic)
输出:
{u'\u4f60\u597d': ['\xe6\xb5\x8b\xe8\xaf\x951', '\xe6\xb5\x8b\xe8\xaf\x952']}
{"你好": ["测试1", "测试2"]}
4.示例
get_zipcode.py:
def get_zipcode(area):
file=os.path.join("areas",area)
with open(file,'r') as f:
data=f.readlines()
aa=[]
for i in data:
if re.match('^\d+',i) != None:
aa.append(i.strip("\n"))
zipcode.update({area: aa})
if __name__=='__main__':
zipcode = {}
if os.path.exists("zipcode"):
os.remove("zipcode")
upath = unicode("areas", 'utf-8')
print os.listdir(upath)
with codecs.open('zipcode','ab','utf8') as f:
for area in os.listdir(upath): #文件夹areas存在多个中文名文件,对文件路径进行unicode编码
print area
get_zipcode(area)
zipcode=json.dumps(zipcode, encoding='UTF-8', ensure_ascii=False)
print zipcode
f.write(zipcode.encode('utf-8'))

浙公网安备 33010602011771号