二进制
-- >ASCII:只能存英文和拉丁字符。一个字符占一个字节,8位
--------->gb2312:只有6700多个中文,1980
---------------->gbk1.0:存了2万多字符,1995
----------------------->gb18030:27000中文,2000
--------->unicode:utf-32:一个字符占4个字节
--------->unicode:utf-16:一个字符占2个字节,65535,
--------->unicode:utf-8:一个英文用ASCII码来存,一个中占三个字节
in python2
默认 是ASCII

in py3
默认是utf-8
encode 在编码的同时,会把数据转成bytes类型
decode 在解码的同时,会把bytes类型转成字符串
b = bytes = 字节类型 = [0-255]纯数字类型
python2.x
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
s = "特斯拉"
print(s)
[root@node1 ~]# python2.7 encode_py2.py
File "encode_py2.py", line 3
SyntaxError: Non-ASCII character '\xe7' in file encode_py2.py on line 3, but no encoding declared; see http://www.python.org/peps/pep-0263.html for details
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
print(s)
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:gbk -*-
s = "特斯拉"
print(s)
[root@node1 ~]# python2.7 encode_py2.py
File "encode_py2.py", line 4
SyntaxError: 'gbk' codec can't decode bytes in position 13-14: illegal multibyte sequence
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
print(s)
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
s_to_unicode = s.decode() #python2.x默认用ascii码
print(s.decode())
[root@node1 ~]# python2.7 encode_py2.py
Traceback (most recent call last):
File "encode_py2.py", line 5, in <module>
s_to_unicode = s.decode()
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 0: ordinal not in range(128)
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
s_to_unicode = s.decode("utf-8")
print(s_to_unicode)
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s_to_unicode)
print(unicode_to_gbk)
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
��˹��
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print(s_to_unicode)
print(unicode_to_gbk)
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
特斯拉
��˹��
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print("unicode:",s_to_unicode)
print("gbk:",unicode_to_gbk)
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
('unicode:', u'\u7279\u65af\u62c9')
('gbk:', '\xcc\xd8\xcb\xb9\xc0\xad')
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print("unicode:",s_to_unicode)
print("gbk:",unicode_to_gbk)
gbk_to_unicode = unicode_to_gbk.encode("utf-8")
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
('unicode:', u'\u7279\u65af\u62c9')
('gbk:', '\xcc\xd8\xcb\xb9\xc0\xad')
Traceback (most recent call last):
File "encode_py2.py", line 11, in <module>
gbk_to_unicode = unicode_to_gbk.encode("utf-8")
UnicodeDecodeError: 'ascii' codec can't decode byte 0xcc in position 0: ordinal not in range(128)
[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-
s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print("unicode:",s_to_unicode)
print("gbk:",unicode_to_gbk)
gbk_to_unicode = unicode_to_gbk.decode("gbk")
unicode_to_utf8 = gbk_to_unicode.encode("utf-8")
print(gbk_to_unicode)
print(unicode_to_utf8)
[root@node1 ~]# python2.7 encode_py2.py
特斯拉
('unicode:', u'\u7279\u65af\u62c9')
('gbk:', '\xcc\xd8\xcb\xb9\xc0\xad')
特斯拉
特斯拉
python3.x
#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30
s = "特斯拉"
print(s)
/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
特斯拉
Process finished with exit code 0
#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30
import sys
print(sys.getdefaultencoding()) #查看默认编码
s = "特斯拉"
print(s)
/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
utf-8
特斯拉
Process finished with exit code 0
#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30
s = "特斯拉"
s_to_gbk = s.encode("gbk")
print(s)
print(s_to_gbk)
/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
特斯拉
b'\xcc\xd8\xcb\xb9\xc0\xad' #b为byte字节类型,[0-255]之间
Process finished with exit code 0
#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30
s = "i am 特斯拉"
s_to_gbk = s.encode("gbk")
print(s)
print(s_to_gbk)
/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
i am 特斯拉
b'i am \xcc\xd8\xcb\xb9\xc0\xad' #文件传输需要用byte类型
Process finished with exit code 0
#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30
s = "i am 特斯拉"
s_to_gbk = s.encode("gbk")
print(s)
print(s_to_gbk.decode("gbk"))
/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
i am 特斯拉
i am 特斯拉
Process finished with exit code 0
浙公网安备 33010602011771号