Python模块 | "requests"

  "requests"模块,支持HTTP连接保持和链接池,自动实现持久连接"keep-alive"。支持使用cookie保持会话、文件上传等功能。自动响应内容编码,国际化URL和POST数据自动编码。使用"requests"模块可以轻而易举的完成浏览器可有的任何操作。但是"requests"模块发送请求,得到网页后,并不会执行"JS"代码,需要自己分析目标站点后,再发起新的"requests"请求

requests.get("https://github.com/timeline.json")                                   # GET请求
requests.post(“https://www.baidu.com/post”)                                        # POST请求
requests.put(“https://www.baidu.com/put”)                                          # PUT请求
requests.delete(“https://www.baidu.com/delete”)                                    # DELETE请求
requests.head(“https://www.baidu.com/get”)                                         # HEAD请求
requests.options(“https://www.baidu.com/get” )                                     # OPTIONS请求
requests.patch("https://www.baicu.com") # PATCH请求

  无论哪种请求方式,他们的使用方法都是固定的,并且在发送基本的HTTP请求同时,还可以附带多种方法传送数据,来相互组合使用

requests.method(url[,parameters])
"params","headers","cookies","data","session","json","files","auth","timeout","proxies","redirects","stream","verify","cert"
requests.post(url,cookies={"Session":"dfsjkalnfsa=="})

  "params"方法会将接收的字典默认自动编码,并发送到url中

r = requests.get("https://www.baidu.com",params={"id":"1"})
#https://www.baidu.com?id=1
url
= "https://www.baidu.com" params = { "id":"1", "value":"action" }
r = requests.post(url,params)
#https://www.baidu.com?id=1&value=action

  "headers"方法用来发送http请求头

r = requests.post(
url = "https://www.baidu.com",
headers = {"Content-Type":"application/x-www-form-urlencoded"}
)

  "cookies"方法用来发送cookie,支持CookieJar,也可以和"http.cookieJar"模块配合使用

cookie = {"Cookie":"fsaujfsdijn=="}
r = requests.post("https://www.baidu.com",cookies=cookie)

jar = requests.cookies.RequestsCookieJar()
jar.set("cookie_baidu","adsfjanjalj==",domain="www.baidu.com",path="/index")
jar.set("cookie_pornhub","fdafgbdaldj==",domain="www.pornhub.com",path="/index")

r = requests.get("https://www.baidu.com",cookies=jar)

  "data"方法用来发送post请求的实体主体,可以接收字典和文件对象,同时支持一键多值,并可以和"json"模块配合

url = "https://www.baidu.com"
headers = {"Content-Type': 'application/x-www-form-urlencoded"}
cookies = {"Cookie":"sfdabifdsak=="}
data = {
            "username":"biboli",
            "password":"123",
       "tag":["handsome","tall","wisdom"]
} r = requests.post(url,headers,cookies,data)
import requests,json

r = requests.post("https://www.baidu.com",data=json.dumps({"some":"data"}))

  "json"方法将数据进行序列化,使之成为一个序列化字符串,并发送到服务器端的body中,需要"Content-Type"是"application/json"

r = requests.post(url = "https://www.baidu.com",
                     json = {"k1":"姓名","k2","biboli"}
)

   "files"方法用来向目标接口传送文件,支持传送多个文件,也可以将字符串当作文件发送;尽量使用二进制打开文件,避免出错

url = "https://www.baidu.com"
herders = {"from":"indexPage"}
data = {"filename":"test.txt","size":"9"}
files = {"test.txt":open("C:\\999\users\test\test.txt","rb")}

r = requests.post(url,headers,data,files)
r = requests.post(
"https://www.baidu.com",
headers = {"from":"fdsafsa==","site":"www.baidu.com"},
data = {"number":"3","action":"save"},
files = {
"file0":"tag",("some,data,to,sen\nnother,row,to,send\n"),
"file1":"hello",open("C:\\you\hello.jpeg","rb"), "file2":"hellophoto",(open("C:\\you\hellophoto.txt","rb"),"image/jpeg"), "file3":"idcard",(open("C:\\you\idcard.jpeg","rb"),"image/jpeg","application/vnd.ms-excel",{"Expires":"0","referer":"localhost"})} )

   "Session"方法能够在跨http请求后保持特定参数,也就是复用tcp。最常见的应用是将cookie保持在后续的请求中。使用时"Session"方法时,需要首先实例化session对象,以session对象发起后面的请求。注意"Session"方法,"S"要大写

r = requests.Session()
r.headers.update({"test1":"true"})

requests_A = requests.get("https://www.baidu.com",headers = {"test2":"true"})
requests_B = requests.get("https://www.baidu.com")

print(requests_A.text)
print(requests_B.text)
#requests_A.text
{
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "www.baidu.com", 
    "User-Agent": "biboli", 
    "test1": "true", 
    "test2": "true"
  }
}
#requests_B.text
{
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "www.baidu.com", 
    "User-Agent": "biboli", 
    "test1": "true"
  }
}

   "auth"方法用来与"HTTP Basic Auth"配合,同时也支持摘要是身份认证和"OAuth 1 Authentication",注意导入需要的模块

import requests
from requests.auth import HTTPDigestAuth
from requests_oauthlib import OAuth1

url = "https://www.baidu.com"

requests_A = requests.get(url,auth=("username","password"))
requests_B = requests.get(url,auth=HTTPDigestAuth("username","password"))
requests_C = requests.get(url,auth=OAuth1("YOUR_APP_KEY","YOUR_APP_SECRET","USER_OAUTH_TOKEN","USER_OAUTH_TOKEN_SECRET"))

   "timeout"方法可以使用一个浮点数同时设置连接超时时间和读取超时时间,也可以使用两个浮点数,分别设置连接超时时间和读取超时时间

requests_A = requests.get("https://www.baidu.com",timeout=1)
#设置1秒时间内没有连接到服务器,就报连接超时错误
requests_B = requests.get("https://www.baidu.com",timeout(1,1.9))
#设置1秒时间内没有连接到服务器,就报连接超时错误。或者读取内容时间大于1.9秒,就报读取超时错误
requests_B = requests.get("https://www.baidu.com",timeout=None)

   "allow_redirects"方法用来禁用或启用重定向,requests会自动处理重定向

requests_A = requests.get("https://www.baidu.com",allow_redirects=False)
requests_B = requests.get("https://www.baidu.com",allow_redirects=True)

   "proxies"方法用来设置请求时使用的代理,可以为特定某个连接方式或主机单独设置代理,并且支持"HTTP Basic Auth"以及SOCKS协议

#python -m pip install requests[socks]

proxies = { "http":"https://username:password@8.210.136.170:3333", "https":"socks5://username:password@8.210.136.170:3335" } requests_A = requests.get("https://www.pornhub.com",proxies) proxies = {"https://www.pornhub.com":"https//username:password@8.210.136.170:3335"} requests_B = requests.get("https://www.pornhub.com",proxies)

   "stream"方法用来获取原始套接字响应,也可以和"Response.iter_lines()"、"Response.iter_content(chunke_size)"相互配合,可以很方便的对流式API进行迭代

r = requests.get("https://www.baidu.com",stream=True)

r.raw
#<urllib3.response.HTTPResponse object at 0x101194810>

r.raw.read(10)
#'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03'

for line in r.inter_lines():
    if line:
        decoded_line = line.decode("utf-8")
        print(json.loads(decode_line))

#Response.iter_lines()并不可靠,若需要多次调用,避免数据丢失,需要使用实例对象

lines = r.iter_lines()
first_line = next(lines)
for line in lines:
  print(line)

#Resonse.iter_content()用来块编码传输,参数可以为"None"默认大小块迭代传输,也可以为整数设定指定大小迭代块大小

   "verify"方法用来指定证书文件目录,或者忽略SSL证书

requests_sll = requests.Session()

requests_A.verify = "/path/to/certfile"
requests_A = requests_sll.get("https://www.baidu.com")

requests_B = requests_sll.get("https://www.baidu.com",verify=False)

   "cert"方法用来指定本地证书文件或多个证书文件或私钥

requests_sll = requests.Session()

requests_A.cert = "/path/client.cert"
requests_A = requests_sll.get("https://www.baidu.com")

requests_B = requests_sll.get("https://www.baidu.com",cert=("/path/client.cert","/path/client.key"))

  响应内容

r.encoding                       #查看响应编码
r.encoding = "gb2312"            #设置响应解析编码
r.ok                             #返回布尔值,判断是否登陆成功
r.status_code                    #返回状态码
r.headers                        #以字典存储服务器响应头,可以不区分大小写访问里面的键值,print(r.headers['content-type']
r.cookies               #如果包含某些cookie,可以直接访问"requests.cookies["Set-Session"]"
r.history                        #重定向历史列表,从旧到新
r.text                           #以encoding解析返回内容,会自动根据响应头部的字符编码进行解码
r.json()                         #requests中内置的JSON解码器,以json形式返回响应内容,前提返回的内容确保是json格式的,不然解析出错会抛异常
r.content                        #以二进制形式返回响应体,会自动为你解码 gzip 和 deflate 压缩,可以和其他模块配合直接创建图像
r.raw                            #返回原始套接字响应体,需要"stream=True",使用r.raw或r.raw.read()

  查询

r.request.headers               #获取请求头内容
r.request.url                   #获取请求url

  错误和异常

Response.raise_for_status()     #用来诊断报错,其中"Timeout"是超时异常,"TooManyRedirects"是超过配置重定向数

#所有的异常内容,都继承自"requests.exceptions.RequestsExeption"
posted @ 2020-09-26 15:42  biboli  阅读(542)  评论(1)    收藏  举报