使用 mitmproxy 录制和回放流量到 CSV 的完整解决方案

下面是一个完整的系统,包含两个脚本:

  1. mitmproxy_recorder.py - 录制流量到 CSV 文件

  2. mitmproxy_replayer.py - 从 CSV 文件回放流量并记录结果

第一部分:流量录制脚本 (mitmproxy_recorder.py)

python
复制
import csv
import time
import os
import json
from datetime import datetime
from mitmproxy import http, ctx
from urllib.parse import urlparse, parse_qs

class TrafficRecorder:
    def __init__(self):
        self.output_dir = "mitmproxy_records"
        os.makedirs(self.output_dir, exist_ok=True)
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.record_file = os.path.join(self.output_dir, f"record_{timestamp}.csv")
        self.summary_file = os.path.join(self.output_dir, f"summary_{timestamp}.csv")
        
        # CSV 文件头
        self.record_headers = [
            "timestamp", "request_method", "request_url", 
            "request_headers", "request_params", "request_body",
            "response_status", "response_headers", "response_body",
            "response_time_ms", "content_type"
        ]
        
        self.summary_headers = [
            "timestamp", "request_url", "request_method",
            "response_status", "response_time_ms", "success"
        ]
        
        # 初始化文件
        with open(self.record_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(self.record_headers)
            
        with open(self.summary_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(self.summary_headers)
        
        ctx.log.info(f"Recording traffic to {self.record_file}")
        ctx.log.info(f"Summary will be saved to {self.summary_file}")

    def _get_params(self, url):
        parsed = urlparse(url)
        return parse_qs(parsed.query)

    def _format_headers(self, headers):
        return json.dumps(dict(headers))

    def request(self, flow: http.HTTPFlow):
        flow.start_time = time.time()

    def response(self, flow: http.HTTPFlow):
        try:
            response_time = (time.time() - flow.start_time) * 1000
            
            # 准备详细记录
            record_row = [
                datetime.now().isoformat(),
                flow.request.method,
                flow.request.url,
                self._format_headers(flow.request.headers),
                json.dumps(self._get_params(flow.request.url)),
                flow.request.content.decode('utf-8', errors='replace') if flow.request.content else "",
                flow.response.status_code,
                self._format_headers(flow.response.headers),
                flow.response.content.decode('utf-8', errors='replace') if flow.response.content else "",
                f"{response_time:.2f}",
                flow.response.headers.get("Content-Type", "")
            ]
            
            # 准备摘要记录
            summary_row = [
                datetime.now().isoformat(),
                flow.request.url,
                flow.request.method,
                flow.response.status_code,
                f"{response_time:.2f}",
                flow.response.status_code < 400
            ]
            
            # 写入文件
            with open(self.record_file, 'a', newline='', encoding='utf-8') as f:
                writer = csv.writer(f)
                writer.writerow(record_row)
                
            with open(self.summary_file, 'a', newline='', encoding='utf-8') as f:
                writer = csv.writer(f)
                writer.writerow(summary_row)
                
        except Exception as e:
            ctx.log.error(f"Error recording request: {e}")

recorder = TrafficRecorder()

def request(flow: http.HTTPFlow):
    recorder.request(flow)

def response(flow: http.HTTPFlow):
    recorder.response(flow)

第二部分:流量回放脚本 (mitmproxy_replayer.py)

python
复制
import csv
import time
import os
import json
import requests
from datetime import datetime
from urllib.parse import parse_qs

class TrafficReplayer:
    def __init__(self, input_csv, output_csv):
        self.input_csv = input_csv
        self.output_csv = output_csv
        
        # 准备输出文件
        self.output_headers = [
            "replay_timestamp", "original_timestamp", "request_url", 
            "request_method", "response_status", "response_time_ms",
            "success", "error_message", "status_match", "response_diff"
        ]
        
        with open(self.output_csv, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(self.output_headers)
        
        print(f"Replaying from {self.input_csv}, saving results to {self.output_csv}")

    def replay(self):
        with open(self.input_csv, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            
            for row in reader:
                try:
                    start_time = time.time()
                    
                    # 准备请求
                    method = row['request_method']
                    url = row['request_url']
                    headers = json.loads(row['request_headers'])
                    params = json.loads(row['request_params'])
                    data = row['request_body']
                    
                    # 发送请求
                    response = requests.request(
                        method=method,
                        url=url,
                        headers=headers,
                        params=params,
                        data=data,
                        verify=False  # 忽略SSL证书验证
                    )
                    
                    response_time = (time.time() - start_time) * 1000
                    
                    # 比较响应
                    original_status = int(row['response_status'])
                    status_match = original_status == response.status_code
                    
                    # 比较响应体 (简化比较)
                    original_body = row['response_body']
                    response_diff = "N/A"  # 这里可以添加更详细的比较逻辑
                    
                    # 记录结果
                    result_row = [
                        datetime.now().isoformat(),
                        row['timestamp'],
                        url,
                        method,
                        response.status_code,
                        f"{response_time:.2f}",
                        response.ok,
                        "",
                        status_match,
                        response_diff
                    ]
                    
                except Exception as e:
                    result_row = [
                        datetime.now().isoformat(),
                        row['timestamp'],
                        row['request_url'],
                        row['request_method'],
                        "0",
                        "0",
                        False,
                        str(e),
                        False,
                        "Error"
                    ]
                
                # 写入结果
                with open(self.output_csv, 'a', newline='', encoding='utf-8') as f:
                    writer = csv.writer(f)
                    writer.writerow(result_row)

if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(description='Replay recorded HTTP traffic from CSV')
    parser.add_argument('input', help='Input CSV file containing recorded traffic')
    parser.add_argument('output', help='Output CSV file for replay results')
    
    args = parser.parse_args()
    
    replayer = TrafficReplayer(args.input, args.output)
    replayer.replay()

使用说明

1. 录制流量

bash
复制
mitmproxy -s mitmproxy_recorder.py

录制完成后,会在 mitmproxy_records 目录下生成两个文件:

  • record_<timestamp>.csv - 包含完整的请求/响应详细信息

  • summary_<timestamp>.csv - 包含简化的摘要信息

2. 回放流量

bash
复制
python mitmproxy_replayer.py mitmproxy_records/record_20230101_120000.csv replay_results.csv

回放结果将保存到 replay_results.csv,包含以下信息:

  • 回放时间戳

  • 原始记录时间戳

  • 请求URL和方法

  • 响应状态码和时间

  • 是否成功

  • 错误信息(如果有)

  • 状态码是否匹配原始记录

  • 响应体差异(简化版)

高级功能扩展

  1. 添加过滤功能

    • 在录制脚本中添加域名/URL过滤

    • 在回放脚本中添加选择性回放

  2. 增强比较功能

    python
    复制
    # 在replayer.py中添加更详细的响应比较
    def compare_responses(original, new):
        # 比较状态码
        status_match = original['status'] == new.status_code
        
        # 比较头信息
        headers_diff = set(original['headers'].items()) - set(new.headers.items())
        
        # 比较正文 (可以使用difflib进行详细比较)
        body_diff = "..." if original['body'] != new.text else ""
    
        return {
            'status_match': status_match,
            'headers_diff': headers_diff,
            'body_diff': body_diff
        }
  3. 添加性能统计

    python
    复制
    # 在replayer.py结束时添加统计信息
    def print_stats(self):
        with open(self.output_csv, 'r') as f:
            reader = csv.DictReader(f)
            results = list(reader)
        
        total = len(results)
        success = sum(1 for r in results if r['success'] == 'True')
        avg_time = sum(float(r['response_time_ms']) for r in results) / total
        
        print(f"\nReplay Summary:")
        print(f"Total requests: {total}")
        print(f"Success rate: {success/total:.2%}")
        print(f"Average response time: {avg_time:.2f}ms")
  4. 支持变量参数化

    python
    复制
    # 在回放前替换动态值
    def prepare_request(self, row):
        url = row['request_url']
        # 替换时间戳变量
        url = url.replace('${timestamp}', str(int(time.time())))
        return url

这个完整的解决方案可以满足流量录制、回放和结果分析的需求,适用于API测试、性能基准测试和回归测试等场景。

posted @ 2025-03-28 01:31  小强找BUG  阅读(75)  评论(0)    收藏  举报