requests【请求库】
""" python请求库的使用-----requests requests源于urllib3,Apache2协议 requests库的使用思路: 1.导入requests包 2.响应头response=requests.get(url,headers,params) 传递url参数,params={'key1':'value','key2':'value'} 3.返回响应头内容及编码raise_for_status response.encoding = apparent_encoding response.content(二进制内容) response.text response.json()返回json格式 response.url传递的url 编码也可以用codecs模块 4.原始响应内容: r=requests.get(url,stream=True) r.raw r.raw.read()---二进制数据 5.保存文本流保存到文件: with open(filename,'wb') as fd: for chunk in r.iter_content(chunk_size): fd.write(chunk) 6.上传: url = 'http://httpbin.org/post' files = {'file': open('report.xls', 'rb')} r = requests.post(url, files=files) 7.重定向 r = requests.get('http://',allow_redirects=False) r.status_code r.history ---- 查看结果 8.超时 r = requests.get('http://',timrout=1) 9.会话对象 s = requests.Session() s.get('http://') r = s.get("http://") (2.会话也可以用来为请求方法提供缺省数据 s = requests.Session() s.auth = ('user','pass') s.headers.update({'x-test':'true'}) #both 'x-test' and 'x-test2' are sent s.get('http://',headers={'x-test2':'true'}) (3.cookie上传 s = requests.Session() r = s.get(url,cookies={'from-my':'browser'}) print(r.text) with requests.Session() as s: s.get(url) 10.准备的请求 from requests import Request,Session s = Session() req = Request('GET',url,data=data,headers=header) prepped = req.prepare() resp = s.send(prepped,stream=stream,verify=verify,proxies=proxies,cert=cert,timeout=timeout) print(resp.status_code) #### from requests import Request,Session s = Session() req = Request('GET',url,data=data,headers=headers) prepped = s.prepare_request(req) resp = s.send(prepped,stream=stream,verify=verify,proxies=proxies,cert=cert,timeout=timeout) print(resp.status_code) 11.SSL证书验证 requests.get(url,verify=True) requests.get(url,cert=('/path/client.cert','/path/client.key')) s = requests.Session() s.cert = '/path/client.cert' 12.流式上传 with open('massive-body') as f: requests.post('https://',data=f) 13.POST多个分块编码的文件 url = 'http://httpbin.org/post' #(form_field_name,file_info) multiple_files =[('images',('foo.png',open('foo.png','rb'),'image/png')),('images',('bar.png',open('bar.png','rb'),'image/png'))] 14.事件钩子 def print_url(r,*args,**kwargs): print(r.url) requests.get(url,hooks=dict(response=print_url)) 15.自定义身份验证 from requests.auth import AuthBase class PizzaAuth(AuthBase): def __init__(self,username): self.username = username def __call__(self,r): r.headers['X-pizza'] = self.name return r 然后我们就可以使用PizzaAuth来进行网络请求: requests.get(url,auth=PizzaAuth('ZYL')) 16流式请求 import json import requests r = requests.get(url,stream=True) for line in r.iter_lines(): if line: decoded_line = line.decode('utf-8') print(json.loads(decoded_line)) ####### r = requests.get(url,stream=True) if r.encoding is None: r.encoding = 'utf-8' for line in r/iter_lines(decode_unicode=True): if line: print(json.loads(line)) """
requests的get请求
#参考方法在demand.py import requests from requests import codes def get_html(url): try: #请求头文件 headers ={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"} #请求 s = requests.Session() r = s.get(url,headers=headers,timeout=1) #错误抛出 r.raise_for_status() #编码设置 r.encoding = r.apparent_encoding #判断是否请求成功 if r.status_code == codes.ok: #网页代码 html = r.text with open("demo.html","w",encoding="utf-8") as f: f.write(html) return html except: print("失败!") def main(): url = input("请输入要爬取的网站地址:") html = get_html(url) print(html) if __name__ == "__main__": main()
"""python请求库的使用-----requestsrequests源于urllib3,Apache2协议requests库的使用思路:1.导入requests包2.响应头response=requests.get(url,headers,params) 传递url参数,params={'key1':'value','key2':'value'}3.返回响应头内容及编码raise_for_status response.encoding = apparent_encoding response.content(二进制内容) response.text response.json()返回json格式 response.url传递的url 编码也可以用codecs模块4.原始响应内容: r=requests.get(url,stream=True) r.raw r.raw.read()---二进制数据5.保存文本流保存到文件: with open(filename,'wb') as fd: for chunk in r.iter_content(chunk_size): fd.write(chunk)6.上传: url = 'http://httpbin.org/post' files = {'file': open('report.xls', 'rb')} r = requests.post(url, files=files)7.重定向 r = requests.get('http://',allow_redirects=False)r.status_coder.history ---- 查看结果
8.超时r = requests.get('http://',timrout=1)
9.会话对象s = requests.Session()s.get('http://')r = s.get("http://")
(2.会话也可以用来为请求方法提供缺省数据s = requests.Session()s.auth = ('user','pass')s.headers.update({'x-test':'true'})#both 'x-test' and 'x-test2' are sents.get('http://',headers={'x-test2':'true'})
(3.cookie上传s = requests.Session()r = s.get(url,cookies={'from-my':'browser'})print(r.text)
with requests.Session() as s: s.get(url)
10.准备的请求from requests import Request,Session
s = Session()req = Request('GET',url,data=data,headers=header)prepped = req.prepare()resp = s.send(prepped,stream=stream,verify=verify,proxies=proxies,cert=cert,timeout=timeout)
print(resp.status_code)
####from requests import Request,Session
s = Session()req = Request('GET',url,data=data,headers=headers)prepped = s.prepare_request(req)resp = s.send(prepped,stream=stream,verify=verify,proxies=proxies,cert=cert,timeout=timeout)print(resp.status_code)
11.SSL证书验证requests.get(url,verify=True)requests.get(url,cert=('/path/client.cert','/path/client.key'))
s = requests.Session()s.cert = '/path/client.cert'
12.流式上传
with open('massive-body') as f: requests.post('https://',data=f)
13.POST多个分块编码的文件
url = 'http://httpbin.org/post'#(form_field_name,file_info)multiple_files =[('images',('foo.png',open('foo.png','rb'),'image/png')),('images',('bar.png',open('bar.png','rb'),'image/png'))]
14.事件钩子def print_url(r,*args,**kwargs): print(r.url)
requests.get(url,hooks=dict(response=print_url))
15.自定义身份验证from requests.auth import AuthBase
class PizzaAuth(AuthBase): def __init__(self,username): self.username = username def __call__(self,r): r.headers['X-pizza'] = self.name return r然后我们就可以使用PizzaAuth来进行网络请求: requests.get(url,auth=PizzaAuth('ZYL'))
16流式请求import jsonimport requests
r = requests.get(url,stream=True)for line in r.iter_lines(): if line: decoded_line = line.decode('utf-8') print(json.loads(decoded_line))
#######r = requests.get(url,stream=True)if r.encoding is None: r.encoding = 'utf-8'for line in r/iter_lines(decode_unicode=True): if line: print(json.loads(line))
"""

浙公网安备 33010602011771号