Python - httpx

Python httpx 库入门教程:现代HTTP客户端的基础用法

(AI写的,复制过来当笔记)

一、httpx 简介

1.1 什么是 httpx?

httpx 是一个功能齐全的HTTP客户端库,支持同步和异步API。它是对 requests 库的现代化替代,提供了更好的性能和更多的功能。

1.2 为什么选择 httpx?

  • 支持 HTTP/1.1 和 HTTP/2
  • 同步和异步支持:一套API,两种用法
  • 类型注解:更好的IDE支持
  • 连接池:提高性能
  • 超时控制:更精细的控制选项
  • WebSocket 支持
  • 更快的性能

1.3 安装 httpx

# 基础安装
pip install httpx

# 如果需要 HTTP/2 支持
pip install httpx[http2]

# 如果需要全部功能
pip install httpx[all]

二、基本用法

2.1 同步请求

import httpx

# 最简单的 GET 请求
response = httpx.get('https://httpbin.org/get')
print(f"状态码: {response.status_code}")
print(f"响应内容: {response.text}")

# 带参数的 GET 请求
params = {'key1': 'value1', 'key2': 'value2'}
response = httpx.get('https://httpbin.org/get', params=params)
print(f"请求URL: {response.url}")
print(f"响应内容: {response.text}")

2.2 异步请求

import asyncio
import httpx

async def main():
    async with httpx.AsyncClient() as client:
        # 异步 GET 请求
        response = await client.get('https://httpbin.org/get')
        print(f"状态码: {response.status_code}")
        print(f"响应内容: {response.text[:100]}...")  # 只显示前100字符

# 运行异步函数
asyncio.run(main())

2.3 查看响应内容

import httpx

response = httpx.get('https://httpbin.org/get')

# 各种查看响应内容的方式
print(f"状态码: {response.status_code}")
print(f"状态文本: {response.reason_phrase}")
print(f"HTTP版本: {response.http_version}")
print(f"响应头: {response.headers}")
print(f"响应文本: {response.text}")          # 文本内容
print(f"响应内容: {response.content}")       # 字节内容
print(f"响应JSON: {response.json()}")        # JSON响应(如果是JSON)

# 获取特定响应头
print(f"Content-Type: {response.headers['content-type']}")
print(f"Content-Length: {response.headers.get('content-length', '未知')}")

三、不同类型的请求

3.1 GET 请求

import httpx

# 基本GET请求
response = httpx.get('https://httpbin.org/get')

# 带查询参数的GET请求
params = {
    'name': '张三',
    'age': 25,
    'city': '北京'
}
response = httpx.get('https://httpbin.org/get', params=params)

# 设置请求头
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Accept': 'application/json',
    'Authorization': 'Bearer token123'
}
response = httpx.get('https://httpbin.org/get', headers=headers)

3.2 POST 请求

import httpx

# 发送表单数据
data = {
    'username': 'admin',
    'password': '123456',
    'remember': True
}
response = httpx.post('https://httpbin.org/post', data=data)

# 发送JSON数据
json_data = {
    'name': '李四',
    'email': 'lisi@example.com',
    'age': 30
}
response = httpx.post('https://httpbin.org/post', json=json_data)

# 发送文件(上传)
files = {'file': open('example.txt', 'rb')}
response = httpx.post('https://httpbin.org/post', files=files)

3.3 其他HTTP方法

import httpx

# PUT 请求
response = httpx.put('https://httpbin.org/put', json={'key': 'value'})

# DELETE 请求
response = httpx.delete('https://httpbin.org/delete')

# PATCH 请求
response = httpx.patch('https://httpbin.org/patch', json={'key': 'value'})

# HEAD 请求(只获取响应头)
response = httpx.head('https://httpbin.org/get')

# OPTIONS 请求(获取服务器支持的HTTP方法)
response = httpx.options('https://httpbin.org')

四、使用Client会话

4.1 同步Client

import httpx

# 使用上下文管理器创建Client(推荐)
with httpx.Client() as client:
    # 设置基础URL
    client.base_url = 'https://httpbin.org'
    
    # 设置默认请求头
    client.headers.update({
        'User-Agent': 'MyApp/1.0',
        'Accept': 'application/json'
    })
    
    # 发送多个请求,共用同一个连接池
    response1 = client.get('/get')
    response2 = client.post('/post', json={'data': 'test'})
    response3 = client.get('/headers')

# Client会自动关闭连接

# 手动创建和关闭Client
client = httpx.Client()
try:
    response = client.get('https://httpbin.org/get')
finally:
    client.close()

4.2 异步Client

import asyncio
import httpx

async def main():
    async with httpx.AsyncClient() as client:
        # 设置默认配置
        client.timeout = httpx.Timeout(10.0)
        client.headers.update({'User-Agent': 'MyAsyncApp/1.0'})
        
        # 同时发送多个请求
        urls = [
            'https://httpbin.org/get',
            'https://httpbin.org/post',
            'https://httpbin.org/headers'
        ]
        
        tasks = [client.get(url) for url in urls]
        responses = await asyncio.gather(*tasks)
        
        for i, response in enumerate(responses):
            print(f"URL {i+1}: {response.url}")
            print(f"状态码: {response.status_code}")

asyncio.run(main())

五、超时和重试

5.1 设置超时

import httpx
from httpx import Timeout

# 全局超时
response = httpx.get('https://httpbin.org/delay/5', timeout=10.0)

# 更精细的超时控制
timeout = Timeout(
    connect=5.0,      # 连接超时
    read=10.0,        # 读取超时
    write=5.0,        # 写入超时
    pool=1.0          # 连接池超时
)

response = httpx.get('https://httpbin.org/delay/3', timeout=timeout)

# 使用Client设置默认超时
with httpx.Client(timeout=timeout) as client:
    response = client.get('https://httpbin.org/delay/2')

5.2 错误处理

import httpx
from httpx import RequestError, TimeoutException, HTTPStatusError

try:
    # 尝试访问一个可能不存在的URL
    response = httpx.get('https://httpbin.org/status/404')
    response.raise_for_status()  # 如果状态码不是2xx,会抛出异常
    
except TimeoutException as e:
    print(f"请求超时: {e}")
    
except HTTPStatusError as e:
    print(f"HTTP错误: {e.response.status_code}")
    print(f"错误信息: {e.response.text}")
    
except RequestError as e:
    print(f"请求错误: {e}")
    
except Exception as e:
    print(f"其他错误: {e}")

六、处理响应

6.1 处理JSON响应

import httpx

response = httpx.get('https://httpbin.org/json')

# 方法1:直接调用.json()方法
data = response.json()
print(f"标题: {data.get('slideshow', {}).get('title', '未知')}")

# 方法2:使用try-except处理JSON解析错误
try:
    data = response.json()
except ValueError as e:
    print(f"JSON解析失败: {e}")
    data = None

# 检查响应是否为JSON
if 'application/json' in response.headers.get('content-type', ''):
    data = response.json()

6.2 处理二进制响应

import httpx

# 下载图片
response = httpx.get('https://httpbin.org/image/png')

if response.status_code == 200:
    # 保存到文件
    with open('image.png', 'wb') as f:
        f.write(response.content)
    
    # 检查文件大小
    print(f"下载了 {len(response.content)} 字节的数据")
    print(f"Content-Type: {response.headers.get('content-type')}")

6.3 流式响应

import httpx

# 下载大文件
url = 'https://httpbin.org/stream-bytes/1000000'  # 1MB的数据

with httpx.stream('GET', url) as response:
    total_bytes = 0
    
    # 逐块读取
    for chunk in response.iter_bytes():
        total_bytes += len(chunk)
        print(f"已接收: {total_bytes} 字节", end='\r')
    
    print(f"\n总共接收: {total_bytes} 字节")

# 设置块大小
with httpx.stream('GET', url, chunk_size=8192) as response:
    for chunk in response.iter_bytes():
        # 处理每个数据块
        pass

七、高级配置

7.1 使用代理

import httpx

# HTTP代理
proxies = {
    'http://': 'http://localhost:8080',
    'https://': 'http://localhost:8080',
}

response = httpx.get('https://httpbin.org/ip', proxies=proxies)

# SOCKS5代理(需要安装 httpx[socks])
# pip install httpx[socks]
proxies = {
    'http://': 'socks5://localhost:9050',
    'https://': 'socks5://localhost:9050',
}

# 使用Client配置代理
with httpx.Client(proxies=proxies) as client:
    response = client.get('https://httpbin.org/ip')

7.2 认证

import httpx
from httpx import BasicAuth, DigestAuth

# 基本认证
auth = BasicAuth('username', 'password')
response = httpx.get('https://httpbin.org/basic-auth/user/pass', auth=auth)

# Digest认证
auth = DigestAuth('username', 'password')
response = httpx.get('https://httpbin.org/digest-auth/auth/user/pass', auth=auth)

# Bearer Token
headers = {'Authorization': 'Bearer your_token_here'}
response = httpx.get('https://httpbin.org/bearer', headers=headers)

# API Key
params = {'api_key': 'your_api_key_here'}
response = httpx.get('https://api.example.com/data', params=params)

7.3 Cookies

import httpx

# 发送Cookies
cookies = {'session_id': 'abc123', 'user_id': '456'}
response = httpx.get('https://httpbin.org/cookies', cookies=cookies)

# 接收Cookies
response = httpx.get('https://httpbin.org/cookies/set?name=value')
print(f"收到的Cookies: {response.cookies}")

# 使用Client管理Cookies
with httpx.Client(cookies=cookies) as client:
    # 所有请求都会自动带上这些Cookies
    response = client.get('https://httpbin.org/cookies')
    
    # 更新Cookies
    client.cookies.set('new_cookie', 'new_value')
    
    response = client.get('https://httpbin.org/cookies')

八、实战示例

8.1 简单的API客户端

import httpx
from typing import Optional, Dict, Any

class APIClient:
    def __init__(self, base_url: str, api_key: Optional[str] = None):
        self.base_url = base_url.rstrip('/')
        self.api_key = api_key
        self.client = httpx.Client(
            base_url=base_url,
            timeout=30.0,
            headers={
                'User-Agent': 'MyAPIClient/1.0',
                'Accept': 'application/json',
            }
        )
        
        if api_key:
            self.client.headers.update({'Authorization': f'Bearer {api_key}'})
    
    def get_user(self, user_id: str) -> Dict[str, Any]:
        """获取用户信息"""
        response = self.client.get(f'/users/{user_id}')
        response.raise_for_status()
        return response.json()
    
    def create_user(self, user_data: Dict[str, Any]) -> Dict[str, Any]:
        """创建用户"""
        response = self.client.post('/users', json=user_data)
        response.raise_for_status()
        return response.json()
    
    def update_user(self, user_id: str, user_data: Dict[str, Any]) -> Dict[str, Any]:
        """更新用户"""
        response = self.client.put(f'/users/{user_id}', json=user_data)
        response.raise_for_status()
        return response.json()
    
    def delete_user(self, user_id: str) -> bool:
        """删除用户"""
        response = self.client.delete(f'/users/{user_id}')
        return response.status_code == 204
    
    def close(self):
        """关闭客户端"""
        self.client.close()
    
    def __enter__(self):
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

# 使用示例
with APIClient('https://api.example.com', 'your_api_key') as client:
    # 创建用户
    new_user = client.create_user({
        'name': '张三',
        'email': 'zhangsan@example.com'
    })
    
    # 获取用户
    user = client.get_user(new_user['id'])
    print(f"用户信息: {user}")

8.2 并发请求示例

import asyncio
import httpx
from typing import List

async def fetch_url(client: httpx.AsyncClient, url: str) -> dict:
    """获取单个URL的内容"""
    try:
        response = await client.get(url)
        response.raise_for_status()
        return {
            'url': url,
            'status': response.status_code,
            'content_length': len(response.content),
            'success': True
        }
    except Exception as e:
        return {
            'url': url,
            'error': str(e),
            'success': False
        }

async def fetch_multiple_urls(urls: List[str]) -> List[dict]:
    """并发获取多个URL"""
    async with httpx.AsyncClient(timeout=10.0) as client:
        tasks = [fetch_url(client, url) for url in urls]
        results = await asyncio.gather(*tasks)
        return results

async def main():
    # 要访问的URL列表
    urls = [
        'https://httpbin.org/get',
        'https://httpbin.org/post',
        'https://httpbin.org/status/200',
        'https://httpbin.org/status/404',
        'https://httpbin.org/delay/2',
    ]
    
    print("开始并发请求...")
    results = await fetch_multiple_urls(urls)
    
    for result in results:
        if result['success']:
            print(f"✓ {result['url']}: 状态码 {result['status']}, 大小 {result['content_length']} 字节")
        else:
            print(f"✗ {result['url']}: 错误 {result['error']}")

# 运行
asyncio.run(main())

九、最佳实践和注意事项

9.1 最佳实践

  1. 总是使用上下文管理器(with语句)来管理Client的生命周期
  2. 设置合理的超时,避免请求无限期等待
  3. 使用连接池提高性能
  4. 处理异常,特别是网络相关的异常
  5. 验证响应状态码,使用response.raise_for_status()
  6. 为生产环境配置重试机制

9.2 常见问题

# 问题1:忘记关闭Client(会造成资源泄漏)
# 错误写法
client = httpx.Client()
response = client.get(...)
# 忘记调用 client.close()

# 正确写法
with httpx.Client() as client:
    response = client.get(...)
# 自动关闭

# 问题2:没有处理异常
# 错误写法
response = httpx.get('https://example.com')
data = response.json()  # 如果响应不是JSON会报错

# 正确写法
try:
    response = httpx.get('https://example.com')
    response.raise_for_status()
    data = response.json() if response.content else None
except (httpx.RequestError, ValueError) as e:
    print(f"请求失败: {e}")
    data = None

# 问题3:同步代码中使用异步Client
# 错误写法(在同步函数中使用await)
def sync_function():
    async with httpx.AsyncClient() as client:  # 错误!
        response = await client.get(...)

# 正确写法:要么全同步,要么全异步

9.3 性能优化

import httpx
import time

# 复用Client(重要!)
start = time.time()

# 错误:每次请求都创建新的Client(慢)
for i in range(10):
    response = httpx.get('https://httpbin.org/get')  # 每次创建新连接

print(f"不用Client: {time.time() - start:.2f}秒")

start = time.time()

# 正确:复用同一个Client(快)
with httpx.Client() as client:
    for i in range(10):
        response = client.get('https://httpbin.org/get')  # 复用连接

print(f"使用Client: {time.time() - start:.2f}秒")

十、总结

httpx 是一个功能强大、现代化的HTTP客户端库,它结合了 requests 的易用性和 aiohttp 的异步能力。通过学习本教程,你应该能够:

  1. 使用 httpx 发送各种HTTP请求
  2. 处理请求参数、头部、Cookies和认证
  3. 使用同步和异步两种模式
  4. 处理响应和错误
  5. 配置代理、超时和其他高级选项
  6. 编写高效的HTTP客户端代码

下一步学习建议:

  1. 学习 httpx 的HTTP/2 支持
  2. 探索 WebSocket 客户端功能
  3. 学习如何编写中间件
  4. 了解连接池的详细配置
  5. 研究如何与FastAPI等框架集成

参考资源:

posted @ 2026-01-30 09:37  一亩食堂  阅读(0)  评论(0)    收藏  举报