二进制

-- >ASCII:只能存英文和拉丁字符。一个字符占一个字节,8位

--------->gb2312:只有6700多个中文,1980

---------------->gbk1.0:存了2万多字符,1995

----------------------->gb18030:27000中文,2000

--------->unicode:utf-32:一个字符占4个字节

--------->unicode:utf-16:一个字符占2个字节,65535,

--------->unicode:utf-8:一个英文用ASCII码来存,一个中占三个字节

in python2

默认 是ASCII

 in py3

默认是utf-8

encode 在编码的同时,会把数据转成bytes类型

decode 在解码的同时,会把bytes类型转成字符串

b = bytes = 字节类型 = [0-255]纯数字类型

 

python2.x

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7

s = "特斯拉"
print(s)

[root@node1 ~]# python2.7 encode_py2.py 
  File "encode_py2.py", line 3
SyntaxError: Non-ASCII character '\xe7' in file encode_py2.py on line 3, but no encoding declared; see http://www.python.org/peps/pep-0263.html for details

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
print(s)

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:gbk -*-

s = "特斯拉"
print(s)

[root@node1 ~]# python2.7 encode_py2.py 
  File "encode_py2.py", line 4
SyntaxError: 'gbk' codec can't decode bytes in position 13-14: illegal multibyte sequence

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
print(s)

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
s_to_unicode = s.decode()    #python2.x默认用ascii码
print(s.decode())

[root@node1 ~]# python2.7 encode_py2.py 
Traceback (most recent call last):
  File "encode_py2.py", line 5, in <module>
    s_to_unicode = s.decode()
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 0: ordinal not in range(128)

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
s_to_unicode = s.decode("utf-8")
print(s_to_unicode)

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s_to_unicode)
print(unicode_to_gbk)

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉
��˹��

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print(s_to_unicode)
print(unicode_to_gbk)

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉
特斯拉
��˹��

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print("unicode:",s_to_unicode)
print("gbk:",unicode_to_gbk)

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉
('unicode:', u'\u7279\u65af\u62c9')
('gbk:', '\xcc\xd8\xcb\xb9\xc0\xad')

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print("unicode:",s_to_unicode)
print("gbk:",unicode_to_gbk)

gbk_to_unicode = unicode_to_gbk.encode("utf-8")

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉
('unicode:', u'\u7279\u65af\u62c9')
('gbk:', '\xcc\xd8\xcb\xb9\xc0\xad')
Traceback (most recent call last):
  File "encode_py2.py", line 11, in <module>
    gbk_to_unicode = unicode_to_gbk.encode("utf-8")
UnicodeDecodeError: 'ascii' codec can't decode byte 0xcc in position 0: ordinal not in range(128)

[root@node1 ~]# vim encode_py2.py
#!/bin/python2.7
# -*- coding:utf-8 -*-

s = "特斯拉"
s_to_unicode = s.decode("utf-8")
unicode_to_gbk = s_to_unicode.encode("gbk")
print(s)
print("unicode:",s_to_unicode)
print("gbk:",unicode_to_gbk)

gbk_to_unicode = unicode_to_gbk.decode("gbk")

unicode_to_utf8 = gbk_to_unicode.encode("utf-8")

print(gbk_to_unicode)
print(unicode_to_utf8)

[root@node1 ~]# python2.7 encode_py2.py 
特斯拉
('unicode:', u'\u7279\u65af\u62c9')
('gbk:', '\xcc\xd8\xcb\xb9\xc0\xad')
特斯拉
特斯拉

python3.x

#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30

s = "特斯拉"

print(s)

/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
特斯拉

Process finished with exit code 0

#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30

import sys
print(sys.getdefaultencoding())    #查看默认编码

s = "特斯拉"

print(s)

/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
utf-8
特斯拉

Process finished with exit code 0

#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30

s = "特斯拉"
s_to_gbk = s.encode("gbk")
print(s)
print(s_to_gbk)

/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
特斯拉
b'\xcc\xd8\xcb\xb9\xc0\xad'    #b为byte字节类型,[0-255]之间

Process finished with exit code 0

#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30

s = "i am 特斯拉"
s_to_gbk = s.encode("gbk")
print(s)
print(s_to_gbk)

/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
i am 特斯拉
b'i am \xcc\xd8\xcb\xb9\xc0\xad'    #文件传输需要用byte类型

Process finished with exit code 0

#!/usr/bin/env python3.8
# __author: "smoke"
# date: 2020/12/13 下午9:30

s = "i am 特斯拉"
s_to_gbk = s.encode("gbk")
print(s)
print(s_to_gbk.decode("gbk"))

/home/smoke/PycharmProjects/pythonProject/venv/bin/python /home/smoke/PycharmProjects/pythonProject/lean_python/encode_py3.py
i am 特斯拉
i am 特斯拉

Process finished with exit code 0