Python3中的Requests库
Requests库——基于Urllib的,简单易用的HTTP库
一,基本GET请求
1.实例引入
1 import requests 2 3 response = requests.get("https://www.baidu.com") #获取百度网页信息 4 print(type(response)) 5 print(response.status_code) #输出状态码 200 6 print(type(response.text)) 7 print(response.text) #输出内容 8 print(response.cookies) #输出cookies内容
2.带参数的GET请求
1 import requests 2 3 data = { 4 'name': 'jack', 5 'age': 20 6 } 7 response = requests.get("http://httpbin.org/get", params=data) #带参数的get请求 8 ''' 9 也可写为: 10 response = requests.get("http://httpbin.org/get?name=jack&age=20") 11 ''' 12 print(response.text)
3.解析json
1 import requests 2 3 response = requests.get("http://httpbin.org/get") 4 print(type(response.text)) 5 print(response.json()) 6 ''' 7 相当于: 8 import json 9 print(json.loads(response.text)) 10 ''' 11 print(type(response.json()))
输出结果为:
<class 'str'> {'args': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.22.0', 'X-Amzn-Trace-Id': 'Root=1-5e467444-697bbfa202d8d6e88ccdea42'}, 'origin': '223.88.90.83', 'url': 'http://httpbin.org/get'} <class 'dict'>
4.获取二进制数据
1 import requests 2 3 #爬取百度图片 4 response = requests.get("https://dss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo_top-e3b63a0b1b.png") 5 print(type(response.text),type(response.content)) #输出为<class 'str'> <class 'bytes'> 6 print(response.text) # 输出为乱码 7 print(response.content) #二进制内容
保存二进制文件:
1 import requests 2 3 #获取百度图片 4 response = requests.get("https://dss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo_top-e3b63a0b1b.png") 5 with open('baidu.png', 'wb') as f: 6 f.write(response.content) #将二进制内容写入文件 7 f.close()
5.添加headers
1 import requests 2 3 headers = { 4 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 5 } #设置headers 6 response = requests.get('https://www.zhihu.com/explore', headers=headers) 7 print(response.text)
二,基本POST请求
1 import requests 2 3 data = {'name': 'jack', 'age': '20'} #添加form data部分数据 4 response = requests.post('http://httpbin.org/post', data=data) #post请求 5 print(response.text)
1 #添加headers信息 2 import requests 3 4 data = {'name': 'jack', 'age': '20'} 5 headers = { 6 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 7 } #headers信息 8 response = requests.post('http://httpbin.org/post', data=data, headers=headers) 9 print(response.json()) #输出为json格式
三,响应
1.response属性
1 import requests 2 3 response = requests.get('http://www.jianshu.com') 4 print(type(response.status_code), response.status_code) #状态码 5 print(type(response.headers), response.headers) #headers信息 6 print(type(response.cookies), response.cookies) #cookies信息 7 print(type(response.url), response.url) #访问的url 8 print(type(response.history), response.history) #访问的历史记录
2.状态码判断
100:{'continue',},
200:{'ok', 'okay', 'all_ok', 'all_okay'},
201:{'created',},
#重定向
300:{'multiple choices',},
301:{'moved_permanently', 'moved'},
#客户端错误
401:{'unauthorized',},
403:{'forbidden',},
404:{'not found',},
405:{'method not allowed',}
1 import requests 2 3 response = requests.get('http://www.jianshu.com') 4 exit() if not response.status_code==requests.codes.ok else print('Request Successfully') #可用requests.codes.ok代替状态码200
5 ''' 6 也可写为: 7 exit() if not response.status_code==200 else print('Request Successfully') 8 '''
四,高级操作
1.文件上传
1 import requests 2 3 files = {'file': open('baidu.png', 'rb')} #打开文件 4 response = requests.post('http://httpbin.org/post', files=files) #通过post讲文件上传
2.获取cookies
1 import requests 2 3 response = requests.get('https://www.baidu.com') 4 print(response.cookies) #不需要再声明handler 5 for key, value in response.cookies.items(): #输出cookies各项信息 6 print(key + '=' + value)
3.会话维持
模拟登录,维持登录状态
1 import requests 2 3 s = requests.Session() #声明Session对象,用Session对象发起两次get请求,相当于在一个浏览器访问 4 #若不声明Session对象,则是在两个浏览器中操作,设置和访问分别在两个不同的网站,无法维持会话 5 s.get('http://httpbin.org/cookies/set/number/123456789') #访问网站设置cookies 6 response = s.get('http://httpbin.org/cookies') #访问cookies 7 print(response.text) #若没有声明Session对象,等到的cookies为空
4.证书验证
1 #爬取证书不合法,即不安全的网站 2 import requests 3 from requests import urllib3 4 urllib3.disable_warnings() #消除不进行证书验证的警告信息 5 response = requests.get('https://www.12306.cn', verify=False) #设置verify=False,不进行证书验证 6 print(response.status_code)

浙公网安备 33010602011771号