python之常用模块（二）

一、json与pickle模块

1、什么是序列化&反序列化

　内存中的数据类型---->序列化---->特定的格式（json格式或者pickle格式）
　内存中的数据类型<----反序列化<----特定的格式（json格式或者pickle格式）

　土办法：

{'aaa':111}--->序列化str({'aaa':111})----->"{'aaa':111}"
{'aaa':111}<---反序列化eval("{'aaa':111}")<-----"{'aaa':111}"

2、为何要序列化

序列化得到结果=>特定的格式的内容有两种用途
1、可用于存储=》用于存档
2、传输给其他平台使用=》跨平台数据交互

强调：
针对用途1的特定一格式：可是一种专用的格式--->pickle只有python可以识别
针对用途2的特定一格式：应该是一种通用、能够被所有语言识别的格式<---json

3、如何序列化与反序列化

import json

# 序列化
json_res=json.dumps([1,'aaa',True,False])
# print(json_res,type(json_res)) # "[1, "aaa", true, false]"

# 反序列化
l=json.loads(json_res)
print(l,type(l))

import json

# 序列化的结果写入文件的复杂方法
json_res=json.dumps([1,'aaa',True,False])
print(json_res,type(json_res)) # "[1, "aaa", true, false]"
with open('test.json',mode='wt',encoding='utf-8') as f:
    f.write(json_res)

# 将序列化的结果写入文件的简单方法
with open('test.json',mode='wt',encoding='utf-8') as f:
    json.dump([1,'aaa',True,False],f)


# 从文件读取json格式的字符串进行反序列化操作的复杂方法
with open('test.json',mode='rt',encoding='utf-8') as f:
    json_res=f.read()
    l=json.loads(json_res)
    print(l,type(l))

# 从文件读取json格式的字符串进行反序列化操作的简单方法
with open('test.json',mode='rt',encoding='utf-8') as f:
    l=json.load(f)
    print(l,type(l))

　　json验证: json格式兼容的是所有语言通用的数据类型，不能识别某一语言的所独有的类型

import json
json.dumps({1,2,3,4,5})  # 只有python有集合，所以序列化会报错

　　json强调：一定要搞清楚json格式，不要与python混淆

import json
l=json.loads('[1, "aaa", true, false]')
l=json.loads("[1,1.3,true,'aaa', true, false]")  # json里表达字符串的形式是双引号，否则会报错
print(l[0])

　　了解:Python3.6之后：json自动encode和decode了

l = json.loads(b'[1, "aaa", true, false]')
print(l, type(l))

with open('test.json',mode='rb') as f:  # python3.5只能是rt
    l=json.load(f)

res=json.dumps({'name':'哈哈哈'})
print(res,type(res))
res=json.loads('{"name": "\u54c8\u54c8\u54c8"}')
print(res,type(res))

4、猴子补丁

在入口处打猴子补丁

import json
import ujson

def monkey_patch_json():
    json.__name__ = 'ujson'
    json.dumps = ujson.dumps
    json.loads = ujson.loads

monkey_patch_json() # 在入口文件出运行

import ujson as json # 不行

5.pickle模块

import pickle
res=pickle.dumps({1,2,3,4,5})
print(res,type(res))

s=pickle.loads(res)
print(s,type(s))

# pickle的序列化与反序列化
f=open('序列化对象_pickle','wb')#注意是w是写入str,wb是写入bytes,j是'bytes'
f.write(j)  #-------------------等价于pickle.dump(dic,f)

f.close()
#-------------------------反序列化
import pickle
f=open('序列化对象_pickle','rb')

data=pickle.loads(f.read())#  等价于data=pickle.load(f)

print(data['age'])   

# python2与python3的pickle兼容性问题
# coding:utf-8
import pickle

with open('a.pkl',mode='wb') as f:
    # 一：在python3中执行的序列化操作如何兼容python2
    # python2不支持protocol>2，默认python3中protocol=4
    # 所以在python3中dump操作应该指定protocol=2
    pickle.dump('你好啊',f,protocol=2)

with open('a.pkl', mode='rb') as f:
    # 二：python2中反序列化才能正常使用
    res=pickle.load(f)
    print(res)

二、shelve模块

　　shelve模块比pickle模块简单，只有一个open函数，返回类似字典的对象，可读可写;key必须为字符串，而值可以是python所支持的数据类型

import shelve

f=shelve.open(r'sheve.txt')
# f['stu1_info']={'name':'egon','age':18,'hobby':['piao','smoking','drinking']}
# f['stu2_info']={'name':'gangdan','age':53}
# f['school_info']={'website':'http://www.pypy.org','city':'beijing'}

print(f['stu1_info']['hobby'])
f.close()

三、xml模块（了解）

　　xml是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单，不过，古时候，在json还没诞生的黑暗年代，大家只能选择用xml呀，至今很多传统公司如金融行业的很多系统的接口还主要是xml。

　　xml的格式如下，就是通过<>节点来区别数据结构的:

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

　xml协议在各个语言里的都是支持的，在python中可以用以下模块操作xml：

# print(root.iter('year')) #全文搜索
# print(root.find('country')) #在root的子节点找，只找一个
# print(root.findall('country')) #在root的子节点找，找所有

import xml.etree.ElementTree as ET

tree = ET.parse("xmltest.xml")
root = tree.getroot()
print(root.tag)

#遍历xml文档

for child in root:
    print('========>',child.tag,child.attrib,child.attrib['name'])
    for i in child:
        print(i.tag,i.attrib,i.text)

#只遍历year 节点

for node in root.iter('year'):
    print(node.tag,node.text)

#---------------------------------------

import xml.etree.ElementTree as ET

tree = ET.parse("xmltest.xml")
root = tree.getroot()

#修改

for node in root.iter('year'):
    new_year=int(node.text)+1
    node.text=str(new_year)
    node.set('updated','yes')
    node.set('version','1.0')
tree.write('test.xml')


#删除node

for country in root.findall('country'):
   rank = int(country.find('rank').text)
   if rank > 50:
     root.remove(country)

tree.write('output.xml')

#在country内添加（append）节点year2
import xml.etree.ElementTree as ET
tree = ET.parse("a.xml")
root=tree.getroot()
for country in root.findall('country'):
    for year in country.findall('year'):
        if int(year.text) > 2000:
            year2=ET.Element('year2')
            year2.text='新年'
            year2.attrib={'update':'yes'}
            country.append(year2) #往country节点下添加子节点

tree.write('a.xml.swap')

import xml.etree.ElementTree as ET
 
 
new_xml = ET.Element("namelist")
name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
age = ET.SubElement(name,"age",attrib={"checked":"no"})
sex = ET.SubElement(name,"sex")
sex.text = '33'
name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"})
age = ET.SubElement(name2,"age")
age.text = '19'
 
et = ET.ElementTree(new_xml) #生成文档对象
et.write("test.xml", encoding="utf-8",xml_declaration=True)
 
ET.dump(new_xml) #打印生成的格式

四、configparser模块

　配置.ini文件如下：

# 注释1
; 注释2

[section1]
k1 = v1
k2:v2
user=egon
age=18
is_admin=true
salary=31
[section2]
k1 = v1

　　读取

import configparser

config=configparser.ConfigParser()
config.read('a.cfg')

#查看所有的标题
res=config.sections() #['section1', 'section2']
print(res)

#查看标题section1下所有key=value的key
options=config.options('section1')
print(options) #['k1', 'k2', 'user', 'age', 'is_admin', 'salary']

#查看标题section1下所有key=value的(key,value)格式
item_list=config.items('section1')
print(item_list) #[('k1', 'v1'), ('k2', 'v2'), ('user', 'egon'), ('age', '18'), ('is_admin', 'true'), ('salary', '31')]

#查看标题section1下user的值=>字符串格式
val=config.get('section1','user')
print(val) #egon

#查看标题section1下age的值=>整数格式
val1=config.getint('section1','age')
print(val1) #18

#查看标题section1下is_admin的值=>布尔值格式
val2=config.getboolean('section1','is_admin')
print(val2) #True

#查看标题section1下salary的值=>浮点型格式
val3=config.getfloat('section1','salary')
print(val3) #31.0

五、Hashlib模块

1、什么是哈希Hash

　hash一类算法，该算法接受传入的内容，经过运算得到一串hash值
　hash值的特点：
　　I 只要传入的内容一样，得到的hash值必然一样
　　II 不能由hash值返解成内容
　　III 不管传入的内容有多大，只要使用的hash算法不变，得到的hash值长度是一定的

2、Hash的用途

用途1：特点II用于密码密文传输与验证
用途2：特点I、III用于文件完整性校验

123456asd ==> hash字符串
123456asd ==> md5 ==> hash字符串

客户端 ====> hash字符串 ====> 服务端
                           hash字符串

3、如何用

import hashlib

m=hashlib.md5()
m.update('hello'.encode('utf-8'))  # 传入的原材料必须是Bytes类型
m.update('world'.encode('utf-8'))
res=m.hexdigest() # 拿到'helloworld'的hash校验结果
print(res)

m1=hashlib.md5('he'.encode('utf-8'))
m1.update('llo'.encode('utf-8'))
m1.update('w'.encode('utf-8'))
m1.update('orld'.encode('utf-8'))
res=m1.hexdigest()# 'helloworld'
print(res)  # 只要传入的内容一样，得到的hash值必然一样

模拟撞库

cryptograph='aee949757a2e698417463d47acac93df'
import hashlib

# 制作密码字段

passwds=[
    'alex3714',
    'alex1313',
    'alex94139413',
    'alex123456',
    '123456alex',
    'a123lex',
]

dic={}
for p in passwds:
    res=hashlib.md5(p.encode('utf-8'))
    dic[p]=res.hexdigest()

# 模拟撞库得到密码

for k,v in dic.items():
    if v == cryptograph:
        print('撞库成功，明文密码是：%s' %k)
        break


# 提升撞库的成本=>密码加盐
import hashlib

m=hashlib.md5()

m.update('天王'.encode('utf-8'))
m.update('alex3714'.encode('utf-8'))
m.update('盖地虎'.encode('utf-8'))
print(m.hexdigest())

六、subprocess模块

import  subprocess

'''
sh-3.2# ls /Users/egon/Desktop |grep txt$
mysql.txt
tt.txt
事物.txt
'''

res1=subprocess.Popen('ls /Users/jieli/Desktop',shell=True,stdout=subprocess.PIPE)
res=subprocess.Popen('grep txt$',shell=True,stdin=res1.stdout,
                 stdout=subprocess.PIPE)

print(res.stdout.read().decode('utf-8'))


#等同于上面,但是上面的优势在于,一个数据流可以和另外一个数据流交互,可以通过爬虫得到结果然后交给grep
res1=subprocess.Popen('ls /Users/jieli/Desktop |grep txt$',shell=True,stdout=subprocess.PIPE)
print(res1.stdout.read().decode('utf-8'))


#windows下:
# dir | findstr 'test*'
# dir | findstr 'txt$'
import subprocess
res1=subprocess.Popen(r'dir C:\Users\Administrator\PycharmProjects\test\函数备课',shell=True,stdout=subprocess.PIPE)
res=subprocess.Popen('findstr test*',shell=True,stdin=res1.stdout,
                 stdout=subprocess.PIPE)

print(res.stdout.read().decode('gbk')) #subprocess使用当前系统默认编码，得到结果为bytes类型，在windows下需要用gbk解码

posted @ 2020-03-31 17:55 Lance_王阅读(201) 评论(0) 收藏举报

刷新页面返回顶部

Lance_王

我只有在工作得很久而还不停歇的时候，才觉得自己的精神轻快，也觉得自己找到了活着的理由。