Python内置的urllib模块,用于访问网络资源。但是,它用起来比较麻烦,而且,缺少很多实用的高级功能。更好的方案是使用requests。它是一个Python第三方库,处理URL资源特别方便。
1 import requests
2 response = requests.get('http://www.baidu.com/')
3 print(type(response))
4 print(response.status_code)
5 print(type(response.text))
6 print(response.text)
7 print(response.cookies)
8
9 # 各种请求方式
10 import requests
11 requests.post('http://httpbin.org/post')
12 requests.put('http://httpbin.org/put')
13 requests.delete('http://httpbin.org/delete')
14 requests.head('http://httpbin.org/get')
15 requests.options('http://httpbin.org/get')
16
17 # 基本GET请求
18 import requests
19 response = requests.get('http://httpbin.org/get')
20 print(response.text)
21
22 # 带参数GET请求
23 import requests
24 response = requests.get('http://httpbin.org/get?name=germey&age=22')
25 print(response.text)
26
27 import requests
28 data = {
29 'name': 'germey',
30 'age': 22
31 }
32 response = requests.get('http://httpbin.org/get', params = data)
33 print(response.text)
34
35 # 解析json
36 import requests
37 import json
38 response = requests.get('http://httpbin.org/get')
39 print(type(response.text))
40 print(response.json())
41 print(json.loads(response.text))
42 print(type(response.json()))
43
44 # 获取二进制数据
45 import requests
46 response = requests.get('http://github.com/favicon.ico')
47 print(type(response.text), type(response.content))
48 print(response.text)
49 print(response.content)
50
51 # 保存图片
52 import requests
53 response = requests.get('http://github.com/favicon.ico')
54 with open('1.ico', 'wb') as f:
55 f.write(response.content)
56 f.close()
57
58 # 添加headers 不添加的话会请求失败的
59 import requests
60 response = requests.get('http://www.zhihu.com/explore')
61 print(response.text)
62
63 import requests
64 headers = {
65 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
66 }
67 response = requests.get('http://zhihu.com/explore', headers = headers)
68 print(response.text)
69
70 # 基本的POST请求
71 import requests
72 data = {'name': 'germey', 'age': 22}
73 response = requests.post('http://httpbin.org/post', data = data)
74 print(response.text)
75
76 import requests
77 data = {'name':'germey', 'age':22}
78 headers = {
79 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
80 }
81 response = requests.post('http://httpbin.org/post', data = data, headers = headers)
82 print(response.json())
83
84 # response属性
85 import requests
86 response = requests.get('http://www.jianshu.com')
87 print(type(response.status_code), response.status_code)
88 print(type(response.headers), response.headers)
89 print(type(response.cookies), response.cookies)
90 print(type(response.url), response.url)
91 print(type(response.history), response.history)
92
93 # 文件上传
94 import requests
95 files = {'file':open('1.ico', 'rb')}
96 response = requests.post('http://httpbin.org/post', files = files)
97 print(response.text)
98
99 # 获取cookie
100 import requests
101 response = requests.get('http://www.baidu.com')
102 print(response.cookies)
103 for key, value in response.cookies.items():
104 print(key + ' = ' + value)
105
106 # 会话维持 模拟登陆(第一个例子,相当于在两个不同的浏览器请求页面,所以获取不到cookies,所以要用第二个session对象)
107 import requests
108 requests.get('http://httpbin.org/cookies/set/number/123456789')
109 response = requests.get('http://httpbin.org/cookies')
110 print(response.text)
111
112 import requests
113 s = requests.session()
114 s.get('http://httpbin.org/cookies/set/number/123456789')
115 response = s.get('http://httpbin.org/cookies')
116 print(response.text)
117
118 # 证书验证
119 import requests
120 response = requests.get('https://www.12306.cn')
121 print(response.status_code)
1 import requests
2 from requests.exceptions import ConnectTimeout, HTTPError, ReadTimeout, RequestException
3 from requests.auth import HTTPBasicAuth
4 import urllib3
5
6 # 证书验证
7 # 消除警告(下面的HTTPS页面的请求如果不进行验证就会报警告)
8 urllib3.disable_warnings()
9 # verify=False 请求HTTPS页面的时候不进行证书验证,默认为True
10 response = requests.get('https://www.12306.cn', verify=False)
11 print(response.status_code)
12
13 # 代理设置
14 proxies = {
15 "http": "http://127.0.0.1:9743",
16 "https": "https://127.0.0.1:9743",
17 }
18 response = requests.get("https://www.taobao.com", proxies = proxies)
19 print(response.status_code)
20
21 # 代理有用户名和密码的情况
22 proxies = {
23 "http": "http://user:password@127.0.0.1:9743/",
24 }
25 response = requests.get("https://www.taobao.com", proxies = proxies)
26 print(response.status_code)
27
28 # socks代理
29 proxies = {
30 "http": "socks5://127.0.0.1:9742",
31 "https": "socks5://127.0.0.1:9742",
32 }
33 response = requests.get("https://www.taobao.com", proxies = proxies)
34 print(response.status_code)
35
36 # 超时设置
37 try:
38 response = requests.get("http://httpbin.org/get", timeout=0.2)
39 print(response.status_code)
40 except ConnectTimeout:
41 print("timeout!")
42
43 # 认证设置 下面两种方法都可以
44 response = requests.get("http://120.27.34.24:9001", auth=HTTPBasicAuth("user", "123"))
45 print(response.status_code)
46
47 response = requests.get("http://120.27.34.24:9001", auth=("user", "123"))
48 print(response.status_code)
49
50 # 异常处理
51 try:
52 response = requests.get("http://httpbin.org/get", timeout=0.2)
53 print(response.status_code)
54 except ConnectTimeout:
55 print("timeout!")
56 except RequestException:
57 print("RequestException!")
58 except HTTPError:
59 print("HttpError!")
60 except ReadTimeout:
61 print("ReadTimeout")
1 import requests
2 response = requests.get("http://www.baidu.com")
3 print(response.cookies)
4 print("----------")
5 # 把cookie对象转化为字典
6 d = requests.utils.dict_from_cookiejar(response.cookies)
7 print(d)
8 print("----------")
9 # 把字典转化为cookie对象
10 print(requests.utils.cookiejar_from_dict(d))
1 # url解码
2 print(requests.utils.unquote("http://tieba.baidu.com/f?kw=%D2%D7%D3%EF%D1%D4&fr=ala0&tpl=5", encoding="gb18030"))
3 # url编码
4 print(requests.utils.quote("http://tieba.baidu.com/f?kw=%D2%D7%D3%EF%D1%D4&fr=ala0&tpl=5"))