Python模块 | "requests"
"requests"模块,支持HTTP连接保持和链接池,自动实现持久连接"keep-alive"。支持使用cookie保持会话、文件上传等功能。自动响应内容编码,国际化URL和POST数据自动编码。使用"requests"模块可以轻而易举的完成浏览器可有的任何操作。但是"requests"模块发送请求,得到网页后,并不会执行"JS"代码,需要自己分析目标站点后,再发起新的"requests"请求
requests.get("https://github.com/timeline.json") # GET请求
requests.post(“https://www.baidu.com/post”) # POST请求
requests.put(“https://www.baidu.com/put”) # PUT请求
requests.delete(“https://www.baidu.com/delete”) # DELETE请求
requests.head(“https://www.baidu.com/get”) # HEAD请求
requests.options(“https://www.baidu.com/get” ) # OPTIONS请求
requests.patch("https://www.baicu.com") # PATCH请求
无论哪种请求方式,他们的使用方法都是固定的,并且在发送基本的HTTP请求同时,还可以附带多种方法传送数据,来相互组合使用
requests.method(url[,parameters])
"params","headers","cookies","data","session","json","files","auth","timeout","proxies","redirects","stream","verify","cert"
requests.post(url,cookies={"Session":"dfsjkalnfsa=="})
"params"方法会将接收的字典默认自动编码,并发送到url中
r = requests.get("https://www.baidu.com",params={"id":"1"}) #https://www.baidu.com?id=1
url = "https://www.baidu.com" params = { "id":"1", "value":"action" }
r = requests.post(url,params)
#https://www.baidu.com?id=1&value=action
"headers"方法用来发送http请求头
r = requests.post( url = "https://www.baidu.com", headers = {"Content-Type":"application/x-www-form-urlencoded"} )
"cookies"方法用来发送cookie,支持CookieJar,也可以和"http.cookieJar"模块配合使用
cookie = {"Cookie":"fsaujfsdijn=="}
r = requests.post("https://www.baidu.com",cookies=cookie)
jar = requests.cookies.RequestsCookieJar()
jar.set("cookie_baidu","adsfjanjalj==",domain="www.baidu.com",path="/index")
jar.set("cookie_pornhub","fdafgbdaldj==",domain="www.pornhub.com",path="/index")
r = requests.get("https://www.baidu.com",cookies=jar)
"data"方法用来发送post请求的实体主体,可以接收字典和文件对象,同时支持一键多值,并可以和"json"模块配合
url = "https://www.baidu.com" headers = {"Content-Type': 'application/x-www-form-urlencoded"} cookies = {"Cookie":"sfdabifdsak=="} data = { "username":"biboli", "password":"123",
"tag":["handsome","tall","wisdom"]
} r = requests.post(url,headers,cookies,data)
import requests,json r = requests.post("https://www.baidu.com",data=json.dumps({"some":"data"}))
"json"方法将数据进行序列化,使之成为一个序列化字符串,并发送到服务器端的body中,需要"Content-Type"是"application/json"
r = requests.post(url = "https://www.baidu.com", json = {"k1":"姓名","k2","biboli"} )
"files"方法用来向目标接口传送文件,支持传送多个文件,也可以将字符串当作文件发送;尽量使用二进制打开文件,避免出错
url = "https://www.baidu.com" herders = {"from":"indexPage"} data = {"filename":"test.txt","size":"9"} files = {"test.txt":open("C:\\999\users\test\test.txt","rb")} r = requests.post(url,headers,data,files)
r = requests.post( "https://www.baidu.com", headers = {"from":"fdsafsa==","site":"www.baidu.com"}, data = {"number":"3","action":"save"}, files = {
"file0":"tag",("some,data,to,sen\nnother,row,to,send\n"),
"file1":"hello",open("C:\\you\hello.jpeg","rb"), "file2":"hellophoto",(open("C:\\you\hellophoto.txt","rb"),"image/jpeg"), "file3":"idcard",(open("C:\\you\idcard.jpeg","rb"),"image/jpeg","application/vnd.ms-excel",{"Expires":"0","referer":"localhost"})} )
"Session"方法能够在跨http请求后保持特定参数,也就是复用tcp。最常见的应用是将cookie保持在后续的请求中。使用时"Session"方法时,需要首先实例化session对象,以session对象发起后面的请求。注意"Session"方法,"S"要大写
r = requests.Session() r.headers.update({"test1":"true"}) requests_A = requests.get("https://www.baidu.com",headers = {"test2":"true"}) requests_B = requests.get("https://www.baidu.com") print(requests_A.text) print(requests_B.text)
#requests_A.text { "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Host": "www.baidu.com", "User-Agent": "biboli", "test1": "true", "test2": "true" } } #requests_B.text { "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Host": "www.baidu.com", "User-Agent": "biboli", "test1": "true" } }
"auth"方法用来与"HTTP Basic Auth"配合,同时也支持摘要是身份认证和"OAuth 1 Authentication",注意导入需要的模块
import requests from requests.auth import HTTPDigestAuth from requests_oauthlib import OAuth1 url = "https://www.baidu.com" requests_A = requests.get(url,auth=("username","password")) requests_B = requests.get(url,auth=HTTPDigestAuth("username","password")) requests_C = requests.get(url,auth=OAuth1("YOUR_APP_KEY","YOUR_APP_SECRET","USER_OAUTH_TOKEN","USER_OAUTH_TOKEN_SECRET"))
"timeout"方法可以使用一个浮点数同时设置连接超时时间和读取超时时间,也可以使用两个浮点数,分别设置连接超时时间和读取超时时间
requests_A = requests.get("https://www.baidu.com",timeout=1) #设置1秒时间内没有连接到服务器,就报连接超时错误 requests_B = requests.get("https://www.baidu.com",timeout(1,1.9)) #设置1秒时间内没有连接到服务器,就报连接超时错误。或者读取内容时间大于1.9秒,就报读取超时错误
requests_B = requests.get("https://www.baidu.com",timeout=None)
"allow_redirects"方法用来禁用或启用重定向,requests会自动处理重定向
requests_A = requests.get("https://www.baidu.com",allow_redirects=False) requests_B = requests.get("https://www.baidu.com",allow_redirects=True)
"proxies"方法用来设置请求时使用的代理,可以为特定某个连接方式或主机单独设置代理,并且支持"HTTP Basic Auth"以及SOCKS协议
#python -m pip install requests[socks]
proxies = { "http":"https://username:password@8.210.136.170:3333", "https":"socks5://username:password@8.210.136.170:3335" } requests_A = requests.get("https://www.pornhub.com",proxies) proxies = {"https://www.pornhub.com":"https//username:password@8.210.136.170:3335"} requests_B = requests.get("https://www.pornhub.com",proxies)
"stream"方法用来获取原始套接字响应,也可以和"Response.iter_lines()"、"Response.iter_content(chunke_size)"相互配合,可以很方便的对流式API进行迭代
r = requests.get("https://www.baidu.com",stream=True) r.raw #<urllib3.response.HTTPResponse object at 0x101194810> r.raw.read(10) #'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03' for line in r.inter_lines(): if line: decoded_line = line.decode("utf-8") print(json.loads(decode_line))
#Response.iter_lines()并不可靠,若需要多次调用,避免数据丢失,需要使用实例对象
lines = r.iter_lines()
first_line = next(lines)
for line in lines:
print(line)
#Resonse.iter_content()用来块编码传输,参数可以为"None"默认大小块迭代传输,也可以为整数设定指定大小迭代块大小
"verify"方法用来指定证书文件目录,或者忽略SSL证书
requests_sll = requests.Session() requests_A.verify = "/path/to/certfile" requests_A = requests_sll.get("https://www.baidu.com") requests_B = requests_sll.get("https://www.baidu.com",verify=False)
"cert"方法用来指定本地证书文件或多个证书文件或私钥
requests_sll = requests.Session() requests_A.cert = "/path/client.cert" requests_A = requests_sll.get("https://www.baidu.com") requests_B = requests_sll.get("https://www.baidu.com",cert=("/path/client.cert","/path/client.key"))
响应内容
r.encoding #查看响应编码 r.encoding = "gb2312" #设置响应解析编码 r.ok #返回布尔值,判断是否登陆成功 r.status_code #返回状态码 r.headers #以字典存储服务器响应头,可以不区分大小写访问里面的键值,print(r.headers['content-type'] r.cookies #如果包含某些cookie,可以直接访问"requests.cookies["Set-Session"]" r.history #重定向历史列表,从旧到新 r.text #以encoding解析返回内容,会自动根据响应头部的字符编码进行解码 r.json() #requests中内置的JSON解码器,以json形式返回响应内容,前提返回的内容确保是json格式的,不然解析出错会抛异常 r.content #以二进制形式返回响应体,会自动为你解码 gzip 和 deflate 压缩,可以和其他模块配合直接创建图像 r.raw #返回原始套接字响应体,需要"stream=True",使用r.raw或r.raw.read()
查询
r.request.headers #获取请求头内容 r.request.url #获取请求url
错误和异常
Response.raise_for_status() #用来诊断报错,其中"Timeout"是超时异常,"TooManyRedirects"是超过配置重定向数 #所有的异常内容,都继承自"requests.exceptions.RequestsExeption"

浙公网安备 33010602011771号