爬虫技术:数据处理josn和pickle模块

一:json模块

json模块的作用就是讲json字符串("{"a":1,"b":1}")和python能够识别的字典进行相互转换。

import json
import pickle
a = """
{'a'::1,
'c':2}
"""

ret = json.loads(a)
print(ret)

# 结果
json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 2 column 2 (char 2)

a不是标准的json字符串,标准的字符串必须使用双引号进行包裹,这点非常重要。


a = """
{"a":1,
"c":2}
"""

ret = json.loads(a)
print(ret)

print(ret["a"])

# 结果
{'a': 1, 'c': 2}
1

结论:loads: josn字符串---->和python字典之间的转换

import json
import pickle
a = {'a':1,'b':2}

ret = json.dumps(a)  # 将pyhton字典转换为josn字符串,这种情况下对于引号的限制没有那么严格
print(ret)

print(type(ret))

{"a": 1, "b": 2}
<class 'str'>

二:picke模块:将字节和python对象之间进行转换

import pickle
a = {'a':1,'b':2}

b = "哈哈"

c = 1

d = [1,2,3]

ret1 = pickle.dumps(a)

print(ret1)

# b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02u.'

ret2 = pickle.dumps(b)

print(ret2)

# b'\x80\x03X\x06\x00\x00\x00\xe5\x93\x88\xe5\x93\x88q\x00.'

ret3 = pickle.dumps(c)

print(ret3)

# b'\x80\x03K\x01.'

ret4 = pickle.dumps(d)

print(ret4)

# b'\x80\x03]q\x00(K\x01K\x02K\x03e.'

with open("demo.txt","wb") as f:
    f.write(ret1)



这种情况,就要看ret1的编码格式是什么,python中提供了chardet模块来检查编码格式
import chardet
print(chardet.detect(ret1))
# 结果
{'encoding': 'Windows-1252', 'confidence': 0.73, 'language': ''}


import pickle
import chardet
a = {'a':1,'b':2}

d = [1,2,3]

f = open("test.txt","wb")

pickle.dump(d,f)  # 不同于dumps返回一个二进制的结果,dump直接将文件写入指定位置

f.close()

import pickle
import chardet
a = {'a':1,'b':2}

d = [1,2,3]

ret = pickle.dumps(d)

print(ret)

# b'\x80\x03]q\x00(K\x01K\x02K\x03e.'

ret1 = pickle.loads(ret)   # pickle必须接收的是picke自己dumps的二进制类型文件

print(ret1)

# [1, 2, 3]
import pickle
import chardet
a = "1"

ret = a.encode("utf-8")
print(ret)

ret1 = pickle.loads(ret)  # loads的数据,必须是dumps自己处理过的类型才行
print(ret1)

# 结果
_pickle.UnpicklingError: could not find MARK
import pickle
import chardet
a = "1"

ret = a.encode("Windows-1252")
print(ret)

ret1 = pickle.loads(ret)
print(ret1)

# 结果
_pickle.UnpicklingError: could not find MARK

 laod方法

import pickle
import chardet

with open("demo.txt","rb") as f:
    ret = pickle.load(f)
print(ret)

# 结果
{'a': 1, 'b': 2}

 # 如何将多个字典存入txt文件中,并且将文件的内容转换成excel形式呢?

import json
item1 = {"a":1}
item2 = {"b":1}
item3 = {"c":1}
item4 = {"d":1}

ret = {"data":[item1,item2,item3,item4]}

# 存的时候
demo = json.dumps(ret)
print(type(demo))
# with open("demo.txt","w") as f:
#     f.write(demo)


# 取得时候
with open("demo.txt","r") as f:
    data = f.read()
    data1 = json.loads(data)

print(data1)

print(data1["data"])

info_list = data1["data"]

for i in info_list:
    print(type(i))

 

posted @ 2019-09-24 14:07  张京墨  阅读(472)  评论(0编辑  收藏  举报