requests的使用
安装:pip install requests
requests的基本功能:
- 基本请求(GET/POST)。
- 会话(Cookies/超时配置/会话对象) 。
- 代理及SSL验证。
基本请求(GET/POST)
基本Get请求
requests.get(url, params=None, **kwargs)
基本POST请求
requests.post(url, data=None, json=None, **kwargs)
参数:
- headers(头文件信息)
- cookies(cookie包)
- files(文件)
- auth(HTTP身份验证)
- timeout(超时设置)
- allow_redirects(是否启用重定向)
- proxies(协议代理)
- verify(SSL证书验证)
- stream(流)
- cert(ssl证书文件路径)
get请求
import requests # 发送请求 responses = requests.get('http://www.baidu.com') # print(type(responses)) # print(responses.status_code) # 查看这个请求的状态码 200 表示成功 print(responses.encoding) # 查看编码 # print(responses.cookies) # print(responses.text) # 获取网站的源代码 # print(responses.content.decode('utf-8')) #中文乱码 就使用用这个操作 gbk print(responses.content.decode('gbk')) # 中文乱码 就使用用这个操作 gbk
传递参数
import requests '''方式一''' # params = {'name':'joe','age':'18'} # responses = requests.get('http://httpbin.org/get',params=params) # print(responses.text) '''方式二''' responses = requests.get("http://httpbin.org/get?name=joe&age=18") print(responses.text) ''' 发送请求 客户端 ----》 服务器 User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36 请求: post 加密 传递的参数不可见 get 不加密 传递的参数在url地址中可见 '''
伪装浏览器
import requests from faker import Factory #用于生成随机User-Agent fake = Factory().create('Zh_cn') user_agent = fake.user_agent() headers = {'User-Agent':user_agent} # responses = requests.get('http://httpbin.org/get',headers = headers) # responses = requests.get('http://www.baidu.com',headers = headers) # print(responses.text) '''访问知乎''' responses = requests.get('https://www.zhihu.com',headers=headers) print(responses.status_code) print(responses.text)
解析jison数据
# 解析json数据 import requests,json from faker import Factory #用于生成随机User-Agent fake = Factory().create('Zh_cn') user_agent = fake.user_agent() headers = {'User-Agent':user_agent} responses = requests.get('https://github.com/timeline.json',headers = headers) '''方法一''' # print(responses.text) # json 字符串 # print(responses.json()) # 解析 json 字符串 为 字典 '''方法二''' print(responses.status_code) print(responses.text) # json 字符串 print(eval(responses.text)) print(type(eval(responses.text)))
如何优雅的查看json
json文件出现中文乱码:ensure_ascii=False json、字典格式化输出: indent=2 json格式化打印: 使用pprint使用前需要pip install pprint安装
获取二进制文件
import requests,json from faker import Factory #用于生成随机User-Agent '''下载音乐''' fake = Factory().create('Zh_cn') user_agent = fake.user_agent() headers = {'User-Agent':user_agent} url = 'http://isure.stream.qqmusic.qq.com/C400003USWw32Y4oGN.m4a?guid=4447747836&vkey=89A9B8EF026585BE94E19479605C8EC7F3EBFAAF4314FB4286027832209952ED85CC5DEEC80CB4CA89CD15FDD079F35CBB94529F7DB97142&uin=131&fromtag=66' # response = requests.get(url,headers=headers,stream = True).raw.read() #获取一次性二进制文件, # with open('余情未了.mp3','wb') as f: # f.write(response) response = requests.get(url,headers=headers,stream = True) with open('aaa.mp3','wb') as file: for chunk in response.iter_content(1024*1024): #分段读取,单位:kB file.write(chunk) '''图片下载''' url = 'https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1533978567743&di=32bef64350887b66c216c4f23b66e067&imgtype=0&src=http%3A%2F%2Fimg5q.duitang.com%2Fuploads%2Fitem%2F201502%2F09%2F20150209110256_BHyy3.jpeg' response = requests.get(url,stream = True).raw.read() with open('aaa.jpg','wb') as file: file.write(response)
模拟登陆发送cookie
import requests,json from faker import Factory #用于生成随机User-Agent fake = Factory().create('Zh_cn') user_agent = fake.user_agent() '''方式一''' # cookies = {'name':'joe','age':'18'} # responses = requests.get('http://httpbin.org/get',cookies = cookies) # print(responses.text) '''方式二''' headers = { 'User-Agent':user_agent, 'cookies': "name=joe;age=18" } responses = requests.get('http://httpbin.org/get',headers = headers) print(responses.text)
def __init__(self): self.cookies = requests.cookies.RequestsCookieJar() def go(self, url, method, post_data): response = requests.request(method, url , data=post_data , headers=info.headers , cookies=self.cookies) # 传递cookie self.cookies.update(response.cookies) # 保存cookie
利用POST发送用户名、密码及验证码。这里的验证码没有进行识别,只好先读取验证验图片,手动输入。
def LoginByPost(): from PIL import Image from io import BytesIO imgUrl='http://***/authcode.php' #开启session会话 s=requests.session() #获取验证码 res=s.get(imgUrl,stream=True) im=Image.open(BytesIO(res.content)) im.show() #输入验证码 code=input() loginUrl='http://***/admin_loginCheck.php' postData={'pname':'admin','password':'***','validateCode':code} '''请求登陆网址,发送登陆信息''' rs=s.post(loginUrl,postData) '''进入首页''' url='http://***/***/admin_honor.php' res=s.get(url) res.encoding='utf-8' print(res.text)
利用Cookies直接登录。无需用户名、密码及验证码。此时,需要先获得登录该网站后的Cookies,一种方法是通过浏览器查看Cookies,另一种方法是利用上面的requests.session获取登录后的Cookies。我们采用第二种方式。
'''(1)通过requests.session获取Cookies。''' import requests def GetCookie(): from PIL import Image from io import BytesIO imgUrl='http://***/authcode.php' s=requests.session() print(s.cookies.get_dict())#先打印一下,此时一般应该是空的。 res=s.get(imgUrl,stream=True) im=Image.open(BytesIO(res.content)) im.show() code=input() loginUrl='http://***/admin_loginCheck.php' postData={'pname':'admin','password':'***','validateCode':code} rs=s.post(loginUrl,postData) c=requests.cookies.RequestsCookieJar()#利用RequestsCookieJar获取 c.set('cookie-name','cookie-value') s.cookies.update(c) print(s.cookies.get_dict()) '''(2)利用上面获取的Cookies直接登录''' def DirLogin(): s=requests.session() url='http://***/***/admin_honor.php' headers={ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': '***', 'Referer': 'http://***/***/admin_index.php' } cookies={'PHPSESSID': 'cnguud4r1hmn3passs906odp21'}#这里就是利用上面的函数获得的Cookies rs=s.get(url,headers=headers,cookies=cookies,verify=False) rs.encoding='utf-8' print(rs.text) ''' 说明:通过Cookies直接登录,有时好用,有时无用。原因在于此网站是通过服务器的Session对客户进行判断,而Session在服务器端往往会设置会话期限,如果到了时间,服务器会把这个Session删除,这时,你还得再次利用第一个函数进行Cookie的获取。 '''
post请求
传递参数
import requests import json # post请求 ''' 请求 post 加密 传递的参数不可见 get 不加密 传递的参数在url地址中可见 ''' data = {'user':'joe','pwd':'123456'} responses = requests.post('http://httpbin.org/post',data = data).text # responses = requests.get('http://httpbin.org/get',params = data).text print(responses)
上传文件
import requests files = {'file': open('filles.txt','rb')} response = requests.post('http://httpbin.org/post',files = files).text print(response)
发送json数据
import requests import json data = {'user':'joe','pwd':'123456'} print(json.dumps(data)) print(type(json.dumps(data))) # 将字典转换成json字符串 responses = requests.post('http://httpbin.org/post',data = json.dumps(data)).text print(responses)
session会话
cookie和session区别:
- cookie数据是服务器分发存放在客户的浏览器上的数据,session数据放在服务器上
- cookie不是很安全,别人可以分析存放在本地的cookie并进行cookie欺骗
- session会在一定时间内保存在服务器上。当访问增多,会比较占用你服务器的性能
- 单个cookie保存的数据不能超过4K,很多浏览器都限制一个站点最多保存20个cookie
import requests from faker import Factory #用于生成随机User-Agent fake = Factory().create('Zh_cn') user_agent = fake.user_agent() headers = { 'User-Agent':user_agent, 'cookies': "name=joe;age=18" } '''两个请求有关联 后一个是在前一个的基础上''' s = requests.session() # 开启session responses = s.get('http://httpbin.org/get',headers = headers) print(responses.text) s.headers.update(headers) # 修改头信息 responses = s.get('http://httpbin.org/get',headers = {'aaa':'bbb'}) print(responses.text) '''两次请求是完全独立的''' responses = s.get('http://httpbin.org/get',headers = headers) print(responses.text) responses = s.get('http://httpbin.org/get',headers = {'aaa':'bbb'}) print(responses.text)
添加代理
import requests from faker import Factory #用于生成随机User-Agent fake = Factory().create('Zh_cn') user_agent = fake.user_agent() # headers = { # 'User-Agent':user_agent # } proxies = { "http": "http://219.141.153.41:80", "https": "https://125.120.8.172:6666", } response = requests.get("http://httpbin.org/get", proxies = proxies) # response = requests.get("http://www.baidu.com/", proxies = proxies,headers = headers) print(response.text) # response = requests.get("https://www.taobao.com", proxies=proxies) # print(response.status_code) # print(response.text)
requests的几个小技巧
#把cookie对象转化为字典 reqeusts.util.dict_from_cookiejar #把字典转化为cookie对象 reqeusts.util.cookiejar_from_dict #url解码 reqeusts.util.unquote() #url编码 reqeusts.util.quote() #忽略SSL证书验证 response = requests.get("https://www.12306.cn/mormhweb/ ", verify=False) #请求https的网站忽略SSL证书验证之后还是会出现警告信息,在请求前加上下面这句就可以禁用安全请求警告 #InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings InsecureRequestWarning) requests.packages.urllib3.disable_warnings()

浙公网安备 33010602011771号