urllib3 是 Python 中一个 HTTP 客户端库,提供了连接池、线程安全、重试机制等高级功能。
访问网页流程
![]()
urllib3的安装
发送GET请求
import urllib3
http = urllib3.PoolManager()
response = http.request("get", "https://www.baidu.com")
print(response.status)
print(response.data.decode()) # 相应内容(字符串)
print(response.headers)
发送POST请求
import urllib3
http = urllib3.PoolManager()
# post 表单数据
response = http.request(
"POST",
"https://httpbin.org/post",
fields={"name": "John", "age": "30"},
headers={'Content-Type': 'application/x-www-form-urlencoded'}
)
print(response.status)
# post json数据
response = http.request(
"POST",
"https://httpbin.org/post",
fields={"name": "John", "age": "30"},
headers={'Content-Type': 'application/json'}
)
print(response.status)
设置请求超时
# 设置请求超时
response = http.request("GET", "https://httpbin.org/post", timeout=3.0)
print(response.status)
重试机制Retry
import urllib3
from urllib3.util import Retry
# 重试3次,每次重试等待时间为0.5秒
retries = Retry(total=3, backoff_factor=0.5)
http = urllib3.PoolManager(retries=retries)
使用代理
import urllib3
http = urllib3.ProxyManager("http://localhost:3128/")
response = http.request("GET", "http://httpbin.org/ip")
print(response.data.decode())
ssl认证
import urllib3
# 禁用证书验证(不推荐生产环境使用)
http1 = urllib3.PoolManager(cert_reqs="CERT_NONE")
# 使用自定义 CA 证书
http2 = urllib3.PoolManager(ca_certs="/path/to/cert.pem")
json响应
import json
import urllib3
http = urllib3.PoolManager()
response = http.request(
"POST",
"https://httpbin.org/post",
fields={"name": "John", "age": "30"},
headers={'Content-Type': 'application/json'}
)
data = json.loads(response.data.decode())
二进制内容
import urllib3
http = urllib3.PoolManager()
response = http.request("GET", "https://httpbin.org/image/png")
with open("image.png", "wb") as f:
f.write(response.data)
异常处理
import urllib3
http = urllib3.PoolManager()
try:
response = http.request("GET", "https://httpbin.org/image/png")
except urllib3.exceptions.HTTPError as e:
print("请求失败:", e)