爬虫练习之requests

import requests,re

# 确定访问地址
url = "https://tieba.baidu.com/p/5838004691"
# 构造请求头信息
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
# 发送请求
r = requests.get(url,headers=headers)

# 查看requests实例对象的方法
# print(dir(r))

# 获取请求内容
response = r.text
# print(response)
# 编写正则匹配规则获取需要的内容
rule = re.compile(r'<img .*?src="(https://.*?jpg)"')
# 查找符合正则规则里的内容
imgs = re.findall(rule,response)
# print(imgs)
# 遍历每个图片url链接
for img in imgs:
# 向图片url发送请求
data = requests.get(img)
# 获取请求内容(二进制图片)
cont = data.content
# 打开文件,以二进制形式写入
with open(img[-8:],'wb')as f :
f.write(cont)
posted @ 2018-12-19 19:15  心做し·  阅读(146)  评论(0)    收藏  举报