根据mysqlbinlog 解析binlog,并使用解析后的数据,分析获取需要的各种指标

数据库版本:mysql5.7.23 未开启gtid 

1、mysqlbinlog 分析

mysqlbinlog --base64-output=decode-rows -vv --start-datetime='2025-10-28 06:00:00' --stop-datetime='2025-10-28 06:59:59' 3306-bin.000680 > 000680_01.sh

2、Python解析

import re
import datetime
import sys
import argparse
from collections import defaultdict
from pathlib import Path

class BinlogAnalyzer:
    def __init__(self):
        self.transactions = []
        self.current_transaction = None
        self.last_timestamp = None
        self.last_position = 0
        
    def parse_file(self, file_path):
        """解析binlog文件"""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()
        except FileNotFoundError:
            print(f"错误: 文件 {file_path} 不存在")
            return False
        except Exception as e:
            print(f"读取文件时出错: {e}")
            return False
            
        print(f"正在解析文件: {file_path}")
        print(f"文件行数: {len(lines)}")
        
        transaction_count = 0
        for i, line in enumerate(lines):
            line = line.strip()
            
            # 解析位置和时间戳信息
            self._parse_position_and_timestamp(line)
            
            # 检测事务开始
            if line == 'BEGIN' and self.current_transaction is None:
                self.current_transaction = {
                    'start_pos': self.last_position,
                    'start_time': self.last_timestamp,
                    'events': [],
                    'dml_operations': defaultdict(int),
                    'tables_affected': set(),
                    'transaction_id': transaction_count + 1
                }
                
            # 检测事务提交
            elif line.startswith('COMMIT') and self.current_transaction:
                self.current_transaction['end_pos'] = self.last_position
                self.current_transaction['end_time'] = self.last_timestamp
                self._finalize_transaction()
                transaction_count += 1
                
            # 检测DML操作
            elif line.startswith('###') and self.current_transaction:
                self._parse_dml_operation(line)
                
        print(f"解析完成,找到 {transaction_count} 个事务")
        return True
        
    def _parse_position_and_timestamp(self, line):
        """解析位置和时间戳信息"""
        # 解析位置信息
        if line.startswith('# at '):
            try:
                self.last_position = int(line.split()[2])
            except (IndexError, ValueError):
                pass
                
        # 解析时间戳信息
        if line.startswith('#') and 'server id' in line and 'end_log_pos' in line:
            parts = line.split()
            if len(parts) >= 2:
                date_part = parts[0][1:]  # 移除开头的#
                time_part = parts[1]
                try:
                    # 修复:添加年份前缀,假设为20xx年
                    # 从 "251028 06:00:00" 转换为 "2025-10-28 06:00:00"
                    year_prefix = "20" if date_part[:2] <= "50" else "19"
                    full_date = f"{year_prefix}{date_part[:2]}-{date_part[2:4]}-{date_part[4:6]}"
                    datetime_str = f"{full_date} {time_part}"
                    self.last_timestamp = datetime.datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
                except ValueError as e:
                    print(f"时间解析错误: {datetime_str}, 错误: {e}")
                    pass
        
    def _parse_dml_operation(self, line):
        """解析DML操作"""
        # 更精确地检测DML操作类型
        if 'INSERT INTO' in line:
            op_type = 'INSERT'
        elif 'UPDATE' in line and not line.startswith('### WHERE') and not line.startswith('### SET'):
            # UPDATE语句的第一行包含表名
            op_type = 'UPDATE'
        elif 'DELETE FROM' in line:
            op_type = 'DELETE'
        else:
            return
            
        # 提取表名 - 针对不同的DML语句使用不同的正则表达式
        full_table_name = None
        
        # INSERT语句: INSERT INTO `db`.`table`
        if op_type == 'INSERT':
            table_match = re.search(r'INTO `([^`]+)`\.`([^`]+)`', line)
            if table_match:
                database = table_match.group(1)
                table = table_match.group(2)
                full_table_name = f"{database}.{table}"
        
        # UPDATE语句: UPDATE `db`.`table`
        elif op_type == 'UPDATE':
            table_match = re.search(r'UPDATE `([^`]+)`\.`([^`]+)`', line)
            if table_match:
                database = table_match.group(1)
                table = table_match.group(2)
                full_table_name = f"{database}.{table}"
        
        # DELETE语句: DELETE FROM `db`.`table`
        elif op_type == 'DELETE':
            table_match = re.search(r'FROM `([^`]+)`\.`([^`]+)`', line)
            if table_match:
                database = table_match.group(1)
                table = table_match.group(2)
                full_table_name = f"{database}.{table}"
        
        if full_table_name:
            self.current_transaction['events'].append({
                'type': op_type,
                'table': full_table_name
            })
            
            self.current_transaction['dml_operations'][op_type] += 1
            self.current_transaction['tables_affected'].add(full_table_name)
            
    def _finalize_transaction(self):
        """完成事务处理"""
        if (self.current_transaction and 
            self.current_transaction.get('start_time') and 
            self.current_transaction.get('end_time') and
            self.current_transaction.get('start_pos') is not None and
            self.current_transaction.get('end_pos') is not None):
            
            # 计算事务大小和时长
            start_pos = self.current_transaction['start_pos']
            end_pos = self.current_transaction['end_pos']
            
            # 确保大小为正数
            self.current_transaction['size'] = abs(end_pos - start_pos)
            
            start_time = self.current_transaction['start_time']
            end_time = self.current_transaction['end_time']
            
            # 计算毫秒级时长
            duration_ms = (end_time - start_time).total_seconds() * 1000
            self.current_transaction['duration_ms'] = max(duration_ms, 0)  # 确保非负
            
            self.current_transaction['event_count'] = len(self.current_transaction['events'])
            
            self.transactions.append(self.current_transaction)
        
        self.current_transaction = None
        
    def generate_statistics(self):
        """生成统计报告"""
        if not self.transactions:
            return {
                'error': '没有找到事务数据',
                'total_transactions': 0
            }
            
        stats = {}
        
        # 1. 统计每个事务的大小的TOP10 - 同时包含时长和事件数
        stats['top10_transaction_sizes'] = sorted(
            [(t['size'], t['transaction_id'], t['duration_ms'], t['event_count']) for t in self.transactions],
            key=lambda x: x[0], reverse=True
        )[:10]
        
        # 2. 统计每个事务时长的TOP10 (毫秒) - 同时包含大小和事件数
        stats['top10_transaction_durations'] = sorted(
            [(t['duration_ms'], t['transaction_id'], t['size'], t['event_count']) for t in self.transactions],
            key=lambda x: x[0], reverse=True
        )[:10]
        
        # 3. 统计每个事务event的TOP10 - 同时包含大小和时长
        stats['top10_transaction_events'] = sorted(
            [(t['event_count'], t['transaction_id'], t['size'], t['duration_ms']) for t in self.transactions],
            key=lambda x: x[0], reverse=True
        )[:10]
        
        # 4. 统计所有事务的个数、总时长、总大小
        stats['total_transactions'] = len(self.transactions)
        stats['total_duration_ms'] = sum(t['duration_ms'] for t in self.transactions)
        stats['total_size'] = sum(t['size'] for t in self.transactions)
        stats['avg_duration_ms'] = stats['total_duration_ms'] / stats['total_transactions'] if stats['total_transactions'] > 0 else 0
        stats['avg_size'] = stats['total_size'] / stats['total_transactions'] if stats['total_transactions'] > 0 else 0
        
        # 修复:统计总事件数
        total_events = sum(t['event_count'] for t in self.transactions)
        stats['total_events'] = total_events
        stats['avg_events'] = total_events / stats['total_transactions'] if stats['total_transactions'] > 0 else 0
        
        # 5. 修复DML操作统计 - 正确统计每个DML类型的事务数
        dml_stats = defaultdict(lambda: {
            'events_count': 0,
            'transactions_set': set(),  # 使用集合避免重复计数
            'total_size': 0,
            'total_duration_ms': 0
        })
        
        # 修复:为每个事务计算DML统计
        for transaction in self.transactions:
            # 获取该事务中每种DML操作的事件数
            transaction_dml_ops = transaction['dml_operations']
            
            for op_type, event_count in transaction_dml_ops.items():
                if event_count > 0:
                    dml_stats[op_type]['events_count'] += event_count
                    dml_stats[op_type]['transactions_set'].add(transaction['transaction_id'])
                    
                    # 修复:按比例分配事务的size和duration到各个DML类型
                    total_events_in_trans = transaction['event_count']
                    if total_events_in_trans > 0:
                        ratio = event_count / total_events_in_trans
                        dml_stats[op_type]['total_size'] += transaction['size'] * ratio
                        dml_stats[op_type]['total_duration_ms'] += transaction['duration_ms'] * ratio
        
        # 修复:验证DML事务数统计
        dml_transaction_ids = set()
        for op_stats in dml_stats.values():
            dml_transaction_ids.update(op_stats['transactions_set'])
        stats['dml_total_transactions'] = len(dml_transaction_ids)
        
        # 转换集合为计数
        for op_type in dml_stats:
            dml_stats[op_type]['transactions_count'] = len(dml_stats[op_type]['transactions_set'])
            del dml_stats[op_type]['transactions_set']  # 删除原始集合
                
        stats['dml_operations'] = dict(dml_stats)
        
        # 6. 统计DDL操作的个数
        stats['ddl_operations'] = {
            'CREATE': 0,
            'ALTER': 0, 
            'DROP': 0,
            'TRUNCATE': 0
        }
        
        # 7. 统计表DML操作的TOP10 - 同时统计事件次数和事务次数
        table_operations = defaultdict(lambda: {
            'INSERT': {'events_count': 0, 'transactions_count': set()},
            'UPDATE': {'events_count': 0, 'transactions_count': set()}, 
            'DELETE': {'events_count': 0, 'transactions_count': set()}
        })
        
        for transaction in self.transactions:
            for event in transaction['events']:
                table = event['table']
                op_type = event['type']
                table_operations[table][op_type]['events_count'] += 1
                table_operations[table][op_type]['transactions_count'].add(transaction['transaction_id'])
                
        # 转换为更容易处理的格式
        stats['top10_insert_tables'] = sorted(
            [(table, ops['INSERT']['events_count'], len(ops['INSERT']['transactions_count'])) 
             for table, ops in table_operations.items() if ops['INSERT']['events_count'] > 0],
            key=lambda x: x[1], reverse=True
        )[:10]
        
        stats['top10_update_tables'] = sorted(
            [(table, ops['UPDATE']['events_count'], len(ops['UPDATE']['transactions_count'])) 
             for table, ops in table_operations.items() if ops['UPDATE']['events_count'] > 0],
            key=lambda x: x[1], reverse=True
        )[:10]
        
        stats['top10_delete_tables'] = sorted(
            [(table, ops['DELETE']['events_count'], len(ops['DELETE']['transactions_count'])) 
             for table, ops in table_operations.items() if ops['DELETE']['events_count'] > 0],
            key=lambda x: x[1], reverse=True
        )[:10]
        
        # 添加时间范围信息
        if self.transactions:
            start_times = [t['start_time'] for t in self.transactions]
            end_times = [t['end_time'] for t in self.transactions]
            stats['time_range'] = {
                'start': min(start_times),
                'end': max(end_times)
            }
            
            # 修复:计算实际时间跨度
            time_span = stats['time_range']['end'] - stats['time_range']['start']
            stats['time_span_seconds'] = time_span.total_seconds()
            stats['time_span_hours'] = time_span.total_seconds() / 3600
        
        return stats
        
    def print_console_report(self):
        """打印控制台统计报告"""
        stats = self.generate_statistics()
        
        if stats.get('error'):
            print(f"\n❌ {stats['error']}")
            return
        
        print("\n" + "=" * 80)
        print("🎯 MySQL Binlog 事务分析报告")
        print("=" * 80)
        
        # 时间范围
        if 'time_range' in stats:
            time_range = stats['time_range']
            print(f"📅 时间范围: {time_range['start']} 到 {time_range['end']}")
            print(f"⏳ 时间跨度: {stats['time_span_hours']:.2f} 小时 ({stats['time_span_seconds']:.0f} 秒)")
        
        # 1. 事务大小TOP10 - 补充时长和事件数
        print(f"\n🏆 事务大小 TOP10 (字节):")
        print("" * 70)
        if stats['top10_transaction_sizes']:
            for i, (size, trans_id, duration_ms, event_count) in enumerate(stats['top10_transaction_sizes'], 1):
                print(f"   #{i:2d} 事务 {trans_id:3d}: {size:>12,} bytes | 时长: {duration_ms:>8.3f} ms | 事件数: {event_count:>4}")
        else:
            print("   📝 无数据")
            
        # 2. 事务时长TOP10 (毫秒) - 补充大小和事件数
        print(f"\n⏱️  事务时长 TOP10 (毫秒):")
        print("" * 70)
        if stats['top10_transaction_durations']:
            for i, (duration_ms, trans_id, size, event_count) in enumerate(stats['top10_transaction_durations'], 1):
                print(f"   #{i:2d} 事务 {trans_id:3d}: {duration_ms:>12.3f} ms | 大小: {size:>10,} bytes | 事件数: {event_count:>4}")
        else:
            print("   📝 无数据")
            
        # 3. 事务事件数TOP10 - 补充大小和时长
        print(f"\n📊 事务事件数 TOP10:")
        print("" * 70)
        if stats['top10_transaction_events']:
            for i, (event_count, trans_id, size, duration_ms) in enumerate(stats['top10_transaction_events'], 1):
                print(f"   #{i:2d} 事务 {trans_id:3d}: {event_count:>8} 个事件 | 大小: {size:>10,} bytes | 时长: {duration_ms:>8.3f} ms")
        else:
            print("   📝 无数据")
            
        # 4. 总体统计
        print(f"\n📈 总体事务统计:")
        print("" * 50)
        print(f"   📦 事务总数: {stats['total_transactions']:>8}")
        print(f"   📊 总事件数: {stats['total_events']:>8}")
        print(f"   ⏱️  总时长: {stats['total_duration_ms']:>12.3f} ms")
        print(f"   💾 总大小: {stats['total_size']:>12,} bytes")
        print(f"   📊 平均时长: {stats['avg_duration_ms']:>12.3f} ms")
        print(f"   📏 平均大小: {stats['avg_size']:>12,.0f} bytes")
        print(f"   🔢 平均事件数: {stats['avg_events']:>10.1f}")
        
        # 5. DML操作统计
        print(f"\n🛠️  DML操作统计:")
        print("" * 50)
        if stats['dml_operations']:
            # 验证:DML事件总数应该等于总事件数
            dml_total_events = sum(op_stats['events_count'] for op_stats in stats['dml_operations'].values())
            print(f"   🔍 事件数验证: DML事件总数 = {dml_total_events}, 总事件数 = {stats['total_events']}")
            
            # 验证:DML事务数统计
            dml_trans_sum = sum(op_stats['transactions_count'] for op_stats in stats['dml_operations'].values())
            print(f"   🔍 事务数验证: DML事务数之和 = {dml_trans_sum}, 实际DML事务数 = {stats['dml_total_transactions']}")
            print(f"   💡 说明: 一个事务可能包含多种DML操作,因此DML事务数之和可能大于实际DML事务数")
            
            for op_type, op_stats in stats['dml_operations'].items():
                if op_stats['events_count'] > 0:
                    print(f"   {op_type:>6}:")
                    print(f"        事件数: {op_stats['events_count']:>8} ({op_stats['events_count']/stats['total_events']*100:>5.1f}%)")
                    print(f"        事务数: {op_stats['transactions_count']:>8} ({op_stats['transactions_count']/stats['dml_total_transactions']*100:>5.1f}%)")
                    print(f"        总大小: {op_stats['total_size']:>12,.0f} bytes")
                    print(f"        总时长: {op_stats['total_duration_ms']:>12.3f} ms")
        else:
            print("   📝 无DML操作")
                
        # 7. 表DML操作TOP10 - 同时显示事件次数和事务次数
        print(f"\n📥 INSERT操作 TOP10 表:")
        print("" * 50)
        if stats['top10_insert_tables']:
            for i, (table, event_count, trans_count) in enumerate(stats['top10_insert_tables'], 1):
                print(f"   #{i:2d} {table:<40} {event_count:>6} 次事件, {trans_count:>4} 个事务")
        else:
            print("   📝 无INSERT操作")
            
        print(f"\n📤 DELETE操作 TOP10 表:")
        print("" * 50)
        if stats['top10_delete_tables']:
            for i, (table, event_count, trans_count) in enumerate(stats['top10_delete_tables'], 1):
                print(f"   #{i:2d} {table:<40} {event_count:>6} 次事件, {trans_count:>4} 个事务")
        else:
            print("   📝 无DELETE操作")
            
        print(f"\n🔄 UPDATE操作 TOP10 表:")
        print("" * 50)
        if stats['top10_update_tables']:
            for i, (table, event_count, trans_count) in enumerate(stats['top10_update_tables'], 1):
                print(f"   #{i:2d} {table:<40} {event_count:>6} 次事件, {trans_count:>4} 个事务")
        else:
            print("   📝 无UPDATE操作")
            
        print("\n" + "=" * 80)
        print("✅ 分析完成")
        
    def generate_html_report(self, output_file=None):
        """生成HTML格式的报告"""
        stats = self.generate_statistics()
        
        if not output_file:
            input_file = Path(sys.argv[1] if len(sys.argv) > 1 else "unknown")
            output_file = f"{input_file.stem}_report.html"
        
        html_content = f"""
        <!DOCTYPE html>
        <html lang="zh-CN">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>MySQL Binlog 分析报告</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }}
                .container {{ max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }}
                .header {{ text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 8px; margin-bottom: 30px; }}
                .section {{ margin-bottom: 30px; padding: 20px; border: 1px solid #e0e0e0; border-radius: 8px; background: #fafafa; }}
                .section-title {{ color: #333; border-bottom: 2px solid #667eea; padding-bottom: 10px; margin-bottom: 15px; }}
                table {{ width: 100%; border-collapse: collapse; margin-top: 10px; }}
                th, td {{ padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }}
                th {{ background-color: #667eea; color: white; }}
                tr:nth-child(even) {{ background-color: #f8f9fa; }}
                tr:hover {{ background-color: #e9ecef; }}
                .metric {{ display: inline-block; margin: 10px; padding: 15px; background: white; border-radius: 8px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); min-width: 150px; text-align: center; }}
                .metric-value {{ font-size: 24px; font-weight: bold; color: #667eea; }}
                .metric-label {{ font-size: 14px; color: #666; }}
                .top10-table {{ font-size: 14px; }}
                .timestamp {{ color: #666; font-size: 12px; text-align: right; }}
                .note {{ background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 4px; padding: 10px; margin: 10px 0; }}
                .validation {{ background: #d1ecf1; border: 1px solid #bee5eb; border-radius: 4px; padding: 10px; margin: 10px 0; }}
                .warning {{ background: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px; padding: 10px; margin: 10px 0; }}
            </style>
        </head>
        <body>
            <div class="container">
                <div class="header">
                    <h1>📊 MySQL Binlog 事务分析报告</h1>
                    <p>生成时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
                </div>
        """
        
        if stats.get('error'):
            html_content += f"""
                <div class="section">
                    <h2 class="section-title">❌ 错误</h2>
                    <p>{stats['error']}</p>
                </div>
            """
        else:
            # 总体指标
            html_content += """
                <div class="section">
                    <h2 class="section-title">📈 总体指标</h2>
                    <div style="text-align: center;">
            """
            
            metrics = [
                (stats['total_transactions'], '事务总数'),
                (stats['total_events'], '总事件数'),
                (f"{stats['total_duration_ms']:.2f}ms", '总时长'),
                (f"{stats['total_size']:,}", '总大小(bytes)'),
                (f"{stats['avg_duration_ms']:.2f}ms", '平均时长'),
                (f"{stats['avg_size']:,.0f}", '平均大小(bytes)'),
                (f"{stats['avg_events']:.1f}", '平均事件数')
            ]
            
            for value, label in metrics:
                html_content += f"""
                    <div class="metric">
                        <div class="metric-value">{value}</div>
                        <div class="metric-label">{label}</div>
                    </div>
                """
            
            html_content += """
                    </div>
                </div>
            """
            
            # 时间范围
            if 'time_range' in stats:
                time_range = stats['time_range']
                html_content += f"""
                <div class="section">
                    <h2 class="section-title">📅 时间范围</h2>
                    <p><strong>开始:</strong> {time_range['start']}</p>
                    <p><strong>结束:</strong> {time_range['end']}</p>
                    <p><strong>时间跨度:</strong> {stats['time_span_hours']:.2f} 小时 ({stats['time_span_seconds']:.0f} 秒)</p>
                </div>
                """
            
            # TOP10 表格
            def create_top10_table(title, data, primary_field, secondary_fields):
                if not data:
                    return f"<p>无数据</p>"
                
                table_html = f"""
                <table class="top10-table">
                    <tr>
                        <th>排名</th>
                        <th>事务ID</th>
                        <th>{primary_field}</th>
                """
                
                for field in secondary_fields:
                    table_html += f"<th>{field}</th>"
                
                table_html += "</tr>"
                
                for i, item in enumerate(data, 1):
                    table_html += f"""
                    <tr>
                        <td>#{i}</td>
                        <td>事务 {item[1]}</td>
                        <td>{item[0]}</td>
                    """
                    
                    for j in range(2, len(item)):
                        if "bytes" in secondary_fields[j-2].lower():
                            table_html += f"<td>{item[j]:,}</td>"
                        elif "ms" in secondary_fields[j-2].lower():
                            table_html += f"<td>{item[j]:.3f}</td>"
                        else:
                            table_html += f"<td>{item[j]}</td>"
                    
                    table_html += "</tr>"
                
                table_html += "</table>"
                return table_html
            
            html_content += f"""
            <div class="section">
                <h2 class="section-title">🏆 TOP10 排名</h2>
                <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); gap: 20px;">
                    <div>
                        <h3>💾 事务大小</h3>
                        {create_top10_table("事务大小", stats['top10_transaction_sizes'], "大小(bytes)", ["时长(ms)", "事件数"])}
                    </div>
                    <div>
                        <h3>⏱️ 事务时长</h3>
                        {create_top10_table("事务时长", stats['top10_transaction_durations'], "时长(ms)", ["大小(bytes)", "事件数"])}
                    </div>
                    <div>
                        <h3>📊 事务事件数</h3>
                        {create_top10_table("事务事件数", stats['top10_transaction_events'], "事件数", ["大小(bytes)", "时长(ms)"])}
                    </div>
                </div>
            </div>
            """
            
            # DML操作统计
            if stats['dml_operations']:
                # 添加验证信息
                dml_total_events = sum(op_stats['events_count'] for op_stats in stats['dml_operations'].values())
                dml_trans_sum = sum(op_stats['transactions_count'] for op_stats in stats['dml_operations'].values())
                
                html_content += f"""
                <div class="section">
                    <h2 class="section-title">🛠️ DML操作统计</h2>
                    <div class="validation">
                        <strong>数据验证:</strong><br>
                        - 事件数: DML事件总数 = {dml_total_events}, 总事件数 = {stats['total_events']}, 
                        匹配状态: <strong>{'✅ 匹配' if dml_total_events == stats['total_events'] else '❌ 不匹配'}</strong><br>
                        - 事务数: DML事务数之和 = {dml_trans_sum}, 实际DML事务数 = {stats['dml_total_transactions']}
                    </div>
                    <div class="note">
                        <strong>注意:</strong>一个事务可能包含多种DML操作,因此每个DML类型统计的事务数可能会重复计算同一个事务,导致DML事务数之和大于实际DML事务数。
                    </div>
                    <table>
                        <tr>
                            <th>操作类型</th>
                            <th>事件数</th>
                            <th>占比</th>
                            <th>事务数</th>
                            <th>占比</th>
                            <th>总大小(bytes)</th>
                            <th>总时长(ms)</th>
                        </tr>
                """
                for op_type, op_stats in stats['dml_operations'].items():
                    if op_stats['events_count'] > 0:
                        event_percent = op_stats['events_count'] / stats['total_events'] * 100
                        trans_percent = op_stats['transactions_count'] / stats['dml_total_transactions'] * 100
                        html_content += f"""
                        <tr>
                            <td>{op_type}</td>
                            <td>{op_stats['events_count']}</td>
                            <td>{event_percent:.1f}%</td>
                            <td>{op_stats['transactions_count']}</td>
                            <td>{trans_percent:.1f}%</td>
                            <td>{op_stats['total_size']:,.0f}</td>
                            <td>{op_stats['total_duration_ms']:.2f}</td>
                        </tr>
                        """
                html_content += "</table></div>"
            
            # 表操作TOP10
            def create_table_top10(title, data):
                if not data:
                    return "<p>无数据</p>"
                
                table_html = f"""
                <table class="top10-table">
                    <tr>
                        <th>排名</th>
                        <th>表名</th>
                        <th>事件次数</th>
                        <th>事务次数</th>
                    </tr>
                """
                for i, (table, event_count, trans_count) in enumerate(data, 1):
                    table_html += f"""
                    <tr>
                        <td>#{i}</td>
                        <td>{table}</td>
                        <td>{event_count}</td>
                        <td>{trans_count}</td>
                    </tr>
                    """
                table_html += "</table>"
                return table_html
            
            html_content += f"""
            <div class="section">
                <h2 class="section-title">📋 表操作统计</h2>
                <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px;">
                    <div>
                        <h3>📥 INSERT操作TOP10</h3>
                        {create_table_top10("INSERT", stats['top10_insert_tables'])}
                    </div>
                    <div>
                        <h3>📤 DELETE操作TOP10</h3>
                        {create_table_top10("DELETE", stats['top10_delete_tables'])}
                    </div>
                    <div>
                        <h3>🔄 UPDATE操作TOP10</h3>
                        {create_table_top10("UPDATE", stats['top10_update_tables'])}
                    </div>
                </div>
            </div>
            """
            
            # 添加说明
            html_content += """
            <div class="section">
                <h2 class="section-title">📝 说明</h2>
                <div class="note">
                    <p><strong>注意:</strong></p>
                    <ul>
                        <li>事务大小计算基于binlog中的位置差值的绝对值</li>
                        <li>事务时长计算基于事务开始和结束的时间戳差异</li>
                        <li>DML操作统计中的总大小和总时长按事件数比例分配到各个操作类型</li>
                        <li>表操作统计同时显示事件次数和涉及的事务次数</li>
                        <li>一个事务可能包含多种DML操作,因此DML操作的事务数之和可能大于实际DML事务数</li>
                        <li>DML事件总数应该等于总体统计中的总事件数</li>
                    </ul>
                </div>
            </div>
            """
        
        html_content += """
                <div class="timestamp">
                    <p>报告生成工具: MySQL Binlog Analyzer</p>
                </div>
            </div>
        </body>
        </html>
        """
        
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(html_content)
            print(f"✅ HTML报告已生成: {output_file}")
            return True
        except Exception as e:
            print(f"❌ 生成HTML报告失败: {e}")
            return False

def main():
    parser = argparse.ArgumentParser(description='MySQL Binlog事务分析工具')
    parser.add_argument('filename', help='要分析的binlog文件路径')
    parser.add_argument('--html', help='生成HTML报告文件路径', metavar='OUTPUT_FILE')
    parser.add_argument('--console', action='store_true', default=True, help='在控制台显示报告(默认)')
    parser.add_argument('--no-console', action='store_false', dest='console', help='不在控制台显示报告')
    
    args = parser.parse_args()
    
    if not Path(args.filename).exists():
        print(f"错误: 文件 {args.filename} 不存在")
        sys.exit(1)
    
    analyzer = BinlogAnalyzer()
    
    # 解析文件
    if not analyzer.parse_file(args.filename):
        sys.exit(1)
    
    # 生成报告
    if args.console:
        analyzer.print_console_report()
    
    if args.html:
        analyzer.generate_html_report(args.html)
    else:
        # 默认也生成HTML报告
        analyzer.generate_html_report()

if __name__ == "__main__":
    main()
View Code

3、执行效果

[root@dbtbj binlog]# python3 binay.py 000680_01.sh
正在解析文件: 000680_01.sh
文件行数: 318223
解析完成,找到 1159 个事务

================================================================================
🎯 MySQL Binlog 事务分析报告
================================================================================
📅 时间范围: 2025-10-28 06:00:002025-10-28 06:59:56
⏳ 时间跨度: 1.00 小时 (3596 秒)

🏆 事务大小 TOP10 (字节):
──────────────────────────────────────────────────────────────────────
   # 1 事务 784:      283,583 bytes | 时长:    0.000 ms | 事件数: 1832
   # 2 事务 623:      156,506 bytes | 时长:    0.000 ms | 事件数: 1017
   # 3 事务 647:      108,348 bytes | 时长:    0.000 ms | 事件数:  713
   # 4 事务 640:       99,310 bytes | 时长:    0.000 ms | 事件数:  645
   # 5 事务 971:       38,588 bytes | 时长:    0.000 ms | 事件数:  250
   # 6 事务 823:       37,532 bytes | 时长:    0.000 ms | 事件数:  243
   # 7 事务 608:       36,152 bytes | 时长:    0.000 ms | 事件数:  234
   # 8 事务 802:       32,289 bytes | 时长:    0.000 ms | 事件数:  209
   # 9 事务 1142:       26,136 bytes | 时长:    0.000 ms | 事件数:  171
   #10 事务 821:       21,267 bytes | 时长:    0.000 ms | 事件数:  139

⏱️  事务时长 TOP10 (毫秒):
──────────────────────────────────────────────────────────────────────
   # 1 事务 268:     1000.000 ms | 大小:      5,616 bytes | 事件数:    9
   # 2 事务 565:     1000.000 ms | 大小:      9,179 bytes | 事件数:    9
   # 3 事务 811:     1000.000 ms | 大小:        506 bytes | 事件数:    2
   # 4 事务 1045:     1000.000 ms | 大小:      8,046 bytes | 事件数:    9
   # 5 事务   1:        0.000 ms | 大小:     18,897 bytes | 事件数:  123
   # 6 事务   2:        0.000 ms | 大小:     18,897 bytes | 事件数:  123
   # 7 事务   3:        0.000 ms | 大小:        284 bytes | 事件数:    1
   # 8 事务   4:        0.000 ms | 大小:        481 bytes | 事件数:    1
   # 9 事务   5:        0.000 ms | 大小:      3,556 bytes | 事件数:    8
   #10 事务   6:        0.000 ms | 大小:      3,276 bytes | 事件数:    3

📊 事务事件数 TOP10:
──────────────────────────────────────────────────────────────────────
   # 1 事务 784:     1832 个事件 | 大小:    283,583 bytes | 时长:    0.000 ms
   # 2 事务 623:     1017 个事件 | 大小:    156,506 bytes | 时长:    0.000 ms
   # 3 事务 647:      713 个事件 | 大小:    108,348 bytes | 时长:    0.000 ms
   # 4 事务 640:      645 个事件 | 大小:     99,310 bytes | 时长:    0.000 ms
   # 5 事务 971:      250 个事件 | 大小:     38,588 bytes | 时长:    0.000 ms
   # 6 事务 823:      243 个事件 | 大小:     37,532 bytes | 时长:    0.000 ms
   # 7 事务 608:      234 个事件 | 大小:     36,152 bytes | 时长:    0.000 ms
   # 8 事务 802:      209 个事件 | 大小:     32,289 bytes | 时长:    0.000 ms
   # 9 事务 1142:      171 个事件 | 大小:     26,136 bytes | 时长:    0.000 ms
   #10 事务 821:      139 个事件 | 大小:     21,267 bytes | 时长:    0.000 ms

📈 总体事务统计:
──────────────────────────────────────────────────
   📦 事务总数:     1159
   📊 总事件数:    12931
   ⏱️  总时长:     4000.000 ms
   💾 总大小:    3,502,955 bytes
   📊 平均时长:        3.451 ms
   📏 平均大小:        3,022 bytes
   🔢 平均事件数:       11.2

🛠️  DML操作统计:
──────────────────────────────────────────────────
   🔍 事件数验证: DML事件总数 = 12931, 总事件数 = 12931
   🔍 事务数验证: DML事务数之和 = 1307, 实际DML事务数 = 1159
   💡 说明: 一个事务可能包含多种DML操作,因此DML事务数之和可能大于实际DML事务数
   DELETE:
        事件数:     1599 ( 12.4%)
        事务数:       13 (  1.1%)
        总大小:      245,661 bytes
        总时长:        0.000 ms
   INSERT:
        事件数:     9935 ( 76.8%)
        事务数:      544 ( 46.9%)
        总大小:    1,965,384 bytes
        总时长:     2333.333 ms
   UPDATE:
        事件数:     1397 ( 10.8%)
        事务数:      750 ( 64.7%)
        总大小:    1,291,910 bytes
        总时长:     1666.667 ms

📥 INSERT操作 TOP10 表:
──────────────────────────────────────────────────
   # 1 wallet_m_1.t_report_detail                 8745 次事件,  225 个事务
   # 2 wallet_m_2.t_report_detail                  234 次事件,    1 个事务
   # 3 wallet_m_2.t_task                           198 次事件,   84 个事务
   # 4 wallet_m_1.t_task                           146 次事件,   82 个事务
   # 5 wallet_m_2.t_transaction_record              77 次事件,   77 个事务
   # 6 wallet_m_2.t_transaction_record_detail       77 次事件,   77 个事务
   # 7 wallet_m_1.t_transaction_record_detail       69 次事件,   67 个事务
   # 8 wallet_m_1.t_transaction_record              67 次事件,   67 个事务
   # 9 wallet_m_2.t_payment                         61 次事件,   61 个事务
   #10 wallet_m_1.t_payment                         50 次事件,   50 个事务

📤 DELETE操作 TOP10 表:
──────────────────────────────────────────────────
   # 1 wallet_m_1.t_report_detail                 1599 次事件,   13 个事务

🔄 UPDATE操作 TOP10 表:
──────────────────────────────────────────────────
   # 1 wallet_m_2.t_task                           396 次事件,  273 个事务
   # 2 wallet_m_1.t_task                           292 次事件,  227 个事务
   # 3 wallet_m_2.t_payment                        194 次事件,   86 个事务
   # 4 wallet_m_1.t_payment                        110 次事件,   55 个事务
   # 5 wallet_m_2.t_account                         77 次事件,   77 个事务
   # 6 wallet_m_2.t_account_detail                  77 次事件,   77 个事务
   # 7 wallet_m_1.t_account_detail                  69 次事件,   67 个事务
   # 8 wallet_m_1.t_account                         67 次事件,   67 个事务
   # 9 wallet_100343.t_task                         22 次事件,   15 个事务
   #10 wallet_m_2.t_recharge                        20 次事件,   20 个事务

================================================================================
✅ 分析完成
✅ HTML报告已生成: 000680_01_report.html
View Code

4、HTML效果

image

 

 posted on 2025-10-28 11:35  xibuhaohao  阅读(1)  评论(0)    收藏  举报