根据mysqlbinlog 解析binlog,并使用解析后的数据,分析获取需要的各种指标
数据库版本:mysql5.7.23 未开启gtid
1、mysqlbinlog 分析
mysqlbinlog --base64-output=decode-rows -vv --start-datetime='2025-10-28 06:00:00' --stop-datetime='2025-10-28 06:59:59' 3306-bin.000680 > 000680_01.sh
2、Python解析
import re import datetime import sys import argparse from collections import defaultdict from pathlib import Path class BinlogAnalyzer: def __init__(self): self.transactions = [] self.current_transaction = None self.last_timestamp = None self.last_position = 0 def parse_file(self, file_path): """解析binlog文件""" try: with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() except FileNotFoundError: print(f"错误: 文件 {file_path} 不存在") return False except Exception as e: print(f"读取文件时出错: {e}") return False print(f"正在解析文件: {file_path}") print(f"文件行数: {len(lines)}") transaction_count = 0 for i, line in enumerate(lines): line = line.strip() # 解析位置和时间戳信息 self._parse_position_and_timestamp(line) # 检测事务开始 if line == 'BEGIN' and self.current_transaction is None: self.current_transaction = { 'start_pos': self.last_position, 'start_time': self.last_timestamp, 'events': [], 'dml_operations': defaultdict(int), 'tables_affected': set(), 'transaction_id': transaction_count + 1 } # 检测事务提交 elif line.startswith('COMMIT') and self.current_transaction: self.current_transaction['end_pos'] = self.last_position self.current_transaction['end_time'] = self.last_timestamp self._finalize_transaction() transaction_count += 1 # 检测DML操作 elif line.startswith('###') and self.current_transaction: self._parse_dml_operation(line) print(f"解析完成,找到 {transaction_count} 个事务") return True def _parse_position_and_timestamp(self, line): """解析位置和时间戳信息""" # 解析位置信息 if line.startswith('# at '): try: self.last_position = int(line.split()[2]) except (IndexError, ValueError): pass # 解析时间戳信息 if line.startswith('#') and 'server id' in line and 'end_log_pos' in line: parts = line.split() if len(parts) >= 2: date_part = parts[0][1:] # 移除开头的# time_part = parts[1] try: # 修复:添加年份前缀,假设为20xx年 # 从 "251028 06:00:00" 转换为 "2025-10-28 06:00:00" year_prefix = "20" if date_part[:2] <= "50" else "19" full_date = f"{year_prefix}{date_part[:2]}-{date_part[2:4]}-{date_part[4:6]}" datetime_str = f"{full_date} {time_part}" self.last_timestamp = datetime.datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S") except ValueError as e: print(f"时间解析错误: {datetime_str}, 错误: {e}") pass def _parse_dml_operation(self, line): """解析DML操作""" # 更精确地检测DML操作类型 if 'INSERT INTO' in line: op_type = 'INSERT' elif 'UPDATE' in line and not line.startswith('### WHERE') and not line.startswith('### SET'): # UPDATE语句的第一行包含表名 op_type = 'UPDATE' elif 'DELETE FROM' in line: op_type = 'DELETE' else: return # 提取表名 - 针对不同的DML语句使用不同的正则表达式 full_table_name = None # INSERT语句: INSERT INTO `db`.`table` if op_type == 'INSERT': table_match = re.search(r'INTO `([^`]+)`\.`([^`]+)`', line) if table_match: database = table_match.group(1) table = table_match.group(2) full_table_name = f"{database}.{table}" # UPDATE语句: UPDATE `db`.`table` elif op_type == 'UPDATE': table_match = re.search(r'UPDATE `([^`]+)`\.`([^`]+)`', line) if table_match: database = table_match.group(1) table = table_match.group(2) full_table_name = f"{database}.{table}" # DELETE语句: DELETE FROM `db`.`table` elif op_type == 'DELETE': table_match = re.search(r'FROM `([^`]+)`\.`([^`]+)`', line) if table_match: database = table_match.group(1) table = table_match.group(2) full_table_name = f"{database}.{table}" if full_table_name: self.current_transaction['events'].append({ 'type': op_type, 'table': full_table_name }) self.current_transaction['dml_operations'][op_type] += 1 self.current_transaction['tables_affected'].add(full_table_name) def _finalize_transaction(self): """完成事务处理""" if (self.current_transaction and self.current_transaction.get('start_time') and self.current_transaction.get('end_time') and self.current_transaction.get('start_pos') is not None and self.current_transaction.get('end_pos') is not None): # 计算事务大小和时长 start_pos = self.current_transaction['start_pos'] end_pos = self.current_transaction['end_pos'] # 确保大小为正数 self.current_transaction['size'] = abs(end_pos - start_pos) start_time = self.current_transaction['start_time'] end_time = self.current_transaction['end_time'] # 计算毫秒级时长 duration_ms = (end_time - start_time).total_seconds() * 1000 self.current_transaction['duration_ms'] = max(duration_ms, 0) # 确保非负 self.current_transaction['event_count'] = len(self.current_transaction['events']) self.transactions.append(self.current_transaction) self.current_transaction = None def generate_statistics(self): """生成统计报告""" if not self.transactions: return { 'error': '没有找到事务数据', 'total_transactions': 0 } stats = {} # 1. 统计每个事务的大小的TOP10 - 同时包含时长和事件数 stats['top10_transaction_sizes'] = sorted( [(t['size'], t['transaction_id'], t['duration_ms'], t['event_count']) for t in self.transactions], key=lambda x: x[0], reverse=True )[:10] # 2. 统计每个事务时长的TOP10 (毫秒) - 同时包含大小和事件数 stats['top10_transaction_durations'] = sorted( [(t['duration_ms'], t['transaction_id'], t['size'], t['event_count']) for t in self.transactions], key=lambda x: x[0], reverse=True )[:10] # 3. 统计每个事务event的TOP10 - 同时包含大小和时长 stats['top10_transaction_events'] = sorted( [(t['event_count'], t['transaction_id'], t['size'], t['duration_ms']) for t in self.transactions], key=lambda x: x[0], reverse=True )[:10] # 4. 统计所有事务的个数、总时长、总大小 stats['total_transactions'] = len(self.transactions) stats['total_duration_ms'] = sum(t['duration_ms'] for t in self.transactions) stats['total_size'] = sum(t['size'] for t in self.transactions) stats['avg_duration_ms'] = stats['total_duration_ms'] / stats['total_transactions'] if stats['total_transactions'] > 0 else 0 stats['avg_size'] = stats['total_size'] / stats['total_transactions'] if stats['total_transactions'] > 0 else 0 # 修复:统计总事件数 total_events = sum(t['event_count'] for t in self.transactions) stats['total_events'] = total_events stats['avg_events'] = total_events / stats['total_transactions'] if stats['total_transactions'] > 0 else 0 # 5. 修复DML操作统计 - 正确统计每个DML类型的事务数 dml_stats = defaultdict(lambda: { 'events_count': 0, 'transactions_set': set(), # 使用集合避免重复计数 'total_size': 0, 'total_duration_ms': 0 }) # 修复:为每个事务计算DML统计 for transaction in self.transactions: # 获取该事务中每种DML操作的事件数 transaction_dml_ops = transaction['dml_operations'] for op_type, event_count in transaction_dml_ops.items(): if event_count > 0: dml_stats[op_type]['events_count'] += event_count dml_stats[op_type]['transactions_set'].add(transaction['transaction_id']) # 修复:按比例分配事务的size和duration到各个DML类型 total_events_in_trans = transaction['event_count'] if total_events_in_trans > 0: ratio = event_count / total_events_in_trans dml_stats[op_type]['total_size'] += transaction['size'] * ratio dml_stats[op_type]['total_duration_ms'] += transaction['duration_ms'] * ratio # 修复:验证DML事务数统计 dml_transaction_ids = set() for op_stats in dml_stats.values(): dml_transaction_ids.update(op_stats['transactions_set']) stats['dml_total_transactions'] = len(dml_transaction_ids) # 转换集合为计数 for op_type in dml_stats: dml_stats[op_type]['transactions_count'] = len(dml_stats[op_type]['transactions_set']) del dml_stats[op_type]['transactions_set'] # 删除原始集合 stats['dml_operations'] = dict(dml_stats) # 6. 统计DDL操作的个数 stats['ddl_operations'] = { 'CREATE': 0, 'ALTER': 0, 'DROP': 0, 'TRUNCATE': 0 } # 7. 统计表DML操作的TOP10 - 同时统计事件次数和事务次数 table_operations = defaultdict(lambda: { 'INSERT': {'events_count': 0, 'transactions_count': set()}, 'UPDATE': {'events_count': 0, 'transactions_count': set()}, 'DELETE': {'events_count': 0, 'transactions_count': set()} }) for transaction in self.transactions: for event in transaction['events']: table = event['table'] op_type = event['type'] table_operations[table][op_type]['events_count'] += 1 table_operations[table][op_type]['transactions_count'].add(transaction['transaction_id']) # 转换为更容易处理的格式 stats['top10_insert_tables'] = sorted( [(table, ops['INSERT']['events_count'], len(ops['INSERT']['transactions_count'])) for table, ops in table_operations.items() if ops['INSERT']['events_count'] > 0], key=lambda x: x[1], reverse=True )[:10] stats['top10_update_tables'] = sorted( [(table, ops['UPDATE']['events_count'], len(ops['UPDATE']['transactions_count'])) for table, ops in table_operations.items() if ops['UPDATE']['events_count'] > 0], key=lambda x: x[1], reverse=True )[:10] stats['top10_delete_tables'] = sorted( [(table, ops['DELETE']['events_count'], len(ops['DELETE']['transactions_count'])) for table, ops in table_operations.items() if ops['DELETE']['events_count'] > 0], key=lambda x: x[1], reverse=True )[:10] # 添加时间范围信息 if self.transactions: start_times = [t['start_time'] for t in self.transactions] end_times = [t['end_time'] for t in self.transactions] stats['time_range'] = { 'start': min(start_times), 'end': max(end_times) } # 修复:计算实际时间跨度 time_span = stats['time_range']['end'] - stats['time_range']['start'] stats['time_span_seconds'] = time_span.total_seconds() stats['time_span_hours'] = time_span.total_seconds() / 3600 return stats def print_console_report(self): """打印控制台统计报告""" stats = self.generate_statistics() if stats.get('error'): print(f"\n❌ {stats['error']}") return print("\n" + "=" * 80) print("🎯 MySQL Binlog 事务分析报告") print("=" * 80) # 时间范围 if 'time_range' in stats: time_range = stats['time_range'] print(f"📅 时间范围: {time_range['start']} 到 {time_range['end']}") print(f"⏳ 时间跨度: {stats['time_span_hours']:.2f} 小时 ({stats['time_span_seconds']:.0f} 秒)") # 1. 事务大小TOP10 - 补充时长和事件数 print(f"\n🏆 事务大小 TOP10 (字节):") print("─" * 70) if stats['top10_transaction_sizes']: for i, (size, trans_id, duration_ms, event_count) in enumerate(stats['top10_transaction_sizes'], 1): print(f" #{i:2d} 事务 {trans_id:3d}: {size:>12,} bytes | 时长: {duration_ms:>8.3f} ms | 事件数: {event_count:>4}") else: print(" 📝 无数据") # 2. 事务时长TOP10 (毫秒) - 补充大小和事件数 print(f"\n⏱️ 事务时长 TOP10 (毫秒):") print("─" * 70) if stats['top10_transaction_durations']: for i, (duration_ms, trans_id, size, event_count) in enumerate(stats['top10_transaction_durations'], 1): print(f" #{i:2d} 事务 {trans_id:3d}: {duration_ms:>12.3f} ms | 大小: {size:>10,} bytes | 事件数: {event_count:>4}") else: print(" 📝 无数据") # 3. 事务事件数TOP10 - 补充大小和时长 print(f"\n📊 事务事件数 TOP10:") print("─" * 70) if stats['top10_transaction_events']: for i, (event_count, trans_id, size, duration_ms) in enumerate(stats['top10_transaction_events'], 1): print(f" #{i:2d} 事务 {trans_id:3d}: {event_count:>8} 个事件 | 大小: {size:>10,} bytes | 时长: {duration_ms:>8.3f} ms") else: print(" 📝 无数据") # 4. 总体统计 print(f"\n📈 总体事务统计:") print("─" * 50) print(f" 📦 事务总数: {stats['total_transactions']:>8}") print(f" 📊 总事件数: {stats['total_events']:>8}") print(f" ⏱️ 总时长: {stats['total_duration_ms']:>12.3f} ms") print(f" 💾 总大小: {stats['total_size']:>12,} bytes") print(f" 📊 平均时长: {stats['avg_duration_ms']:>12.3f} ms") print(f" 📏 平均大小: {stats['avg_size']:>12,.0f} bytes") print(f" 🔢 平均事件数: {stats['avg_events']:>10.1f}") # 5. DML操作统计 print(f"\n🛠️ DML操作统计:") print("─" * 50) if stats['dml_operations']: # 验证:DML事件总数应该等于总事件数 dml_total_events = sum(op_stats['events_count'] for op_stats in stats['dml_operations'].values()) print(f" 🔍 事件数验证: DML事件总数 = {dml_total_events}, 总事件数 = {stats['total_events']}") # 验证:DML事务数统计 dml_trans_sum = sum(op_stats['transactions_count'] for op_stats in stats['dml_operations'].values()) print(f" 🔍 事务数验证: DML事务数之和 = {dml_trans_sum}, 实际DML事务数 = {stats['dml_total_transactions']}") print(f" 💡 说明: 一个事务可能包含多种DML操作,因此DML事务数之和可能大于实际DML事务数") for op_type, op_stats in stats['dml_operations'].items(): if op_stats['events_count'] > 0: print(f" {op_type:>6}:") print(f" 事件数: {op_stats['events_count']:>8} ({op_stats['events_count']/stats['total_events']*100:>5.1f}%)") print(f" 事务数: {op_stats['transactions_count']:>8} ({op_stats['transactions_count']/stats['dml_total_transactions']*100:>5.1f}%)") print(f" 总大小: {op_stats['total_size']:>12,.0f} bytes") print(f" 总时长: {op_stats['total_duration_ms']:>12.3f} ms") else: print(" 📝 无DML操作") # 7. 表DML操作TOP10 - 同时显示事件次数和事务次数 print(f"\n📥 INSERT操作 TOP10 表:") print("─" * 50) if stats['top10_insert_tables']: for i, (table, event_count, trans_count) in enumerate(stats['top10_insert_tables'], 1): print(f" #{i:2d} {table:<40} {event_count:>6} 次事件, {trans_count:>4} 个事务") else: print(" 📝 无INSERT操作") print(f"\n📤 DELETE操作 TOP10 表:") print("─" * 50) if stats['top10_delete_tables']: for i, (table, event_count, trans_count) in enumerate(stats['top10_delete_tables'], 1): print(f" #{i:2d} {table:<40} {event_count:>6} 次事件, {trans_count:>4} 个事务") else: print(" 📝 无DELETE操作") print(f"\n🔄 UPDATE操作 TOP10 表:") print("─" * 50) if stats['top10_update_tables']: for i, (table, event_count, trans_count) in enumerate(stats['top10_update_tables'], 1): print(f" #{i:2d} {table:<40} {event_count:>6} 次事件, {trans_count:>4} 个事务") else: print(" 📝 无UPDATE操作") print("\n" + "=" * 80) print("✅ 分析完成") def generate_html_report(self, output_file=None): """生成HTML格式的报告""" stats = self.generate_statistics() if not output_file: input_file = Path(sys.argv[1] if len(sys.argv) > 1 else "unknown") output_file = f"{input_file.stem}_report.html" html_content = f""" <!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>MySQL Binlog 分析报告</title> <style> body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }} .container {{ max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }} .header {{ text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 8px; margin-bottom: 30px; }} .section {{ margin-bottom: 30px; padding: 20px; border: 1px solid #e0e0e0; border-radius: 8px; background: #fafafa; }} .section-title {{ color: #333; border-bottom: 2px solid #667eea; padding-bottom: 10px; margin-bottom: 15px; }} table {{ width: 100%; border-collapse: collapse; margin-top: 10px; }} th, td {{ padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }} th {{ background-color: #667eea; color: white; }} tr:nth-child(even) {{ background-color: #f8f9fa; }} tr:hover {{ background-color: #e9ecef; }} .metric {{ display: inline-block; margin: 10px; padding: 15px; background: white; border-radius: 8px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); min-width: 150px; text-align: center; }} .metric-value {{ font-size: 24px; font-weight: bold; color: #667eea; }} .metric-label {{ font-size: 14px; color: #666; }} .top10-table {{ font-size: 14px; }} .timestamp {{ color: #666; font-size: 12px; text-align: right; }} .note {{ background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 4px; padding: 10px; margin: 10px 0; }} .validation {{ background: #d1ecf1; border: 1px solid #bee5eb; border-radius: 4px; padding: 10px; margin: 10px 0; }} .warning {{ background: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px; padding: 10px; margin: 10px 0; }} </style> </head> <body> <div class="container"> <div class="header"> <h1>📊 MySQL Binlog 事务分析报告</h1> <p>生成时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p> </div> """ if stats.get('error'): html_content += f""" <div class="section"> <h2 class="section-title">❌ 错误</h2> <p>{stats['error']}</p> </div> """ else: # 总体指标 html_content += """ <div class="section"> <h2 class="section-title">📈 总体指标</h2> <div style="text-align: center;"> """ metrics = [ (stats['total_transactions'], '事务总数'), (stats['total_events'], '总事件数'), (f"{stats['total_duration_ms']:.2f}ms", '总时长'), (f"{stats['total_size']:,}", '总大小(bytes)'), (f"{stats['avg_duration_ms']:.2f}ms", '平均时长'), (f"{stats['avg_size']:,.0f}", '平均大小(bytes)'), (f"{stats['avg_events']:.1f}", '平均事件数') ] for value, label in metrics: html_content += f""" <div class="metric"> <div class="metric-value">{value}</div> <div class="metric-label">{label}</div> </div> """ html_content += """ </div> </div> """ # 时间范围 if 'time_range' in stats: time_range = stats['time_range'] html_content += f""" <div class="section"> <h2 class="section-title">📅 时间范围</h2> <p><strong>开始:</strong> {time_range['start']}</p> <p><strong>结束:</strong> {time_range['end']}</p> <p><strong>时间跨度:</strong> {stats['time_span_hours']:.2f} 小时 ({stats['time_span_seconds']:.0f} 秒)</p> </div> """ # TOP10 表格 def create_top10_table(title, data, primary_field, secondary_fields): if not data: return f"<p>无数据</p>" table_html = f""" <table class="top10-table"> <tr> <th>排名</th> <th>事务ID</th> <th>{primary_field}</th> """ for field in secondary_fields: table_html += f"<th>{field}</th>" table_html += "</tr>" for i, item in enumerate(data, 1): table_html += f""" <tr> <td>#{i}</td> <td>事务 {item[1]}</td> <td>{item[0]}</td> """ for j in range(2, len(item)): if "bytes" in secondary_fields[j-2].lower(): table_html += f"<td>{item[j]:,}</td>" elif "ms" in secondary_fields[j-2].lower(): table_html += f"<td>{item[j]:.3f}</td>" else: table_html += f"<td>{item[j]}</td>" table_html += "</tr>" table_html += "</table>" return table_html html_content += f""" <div class="section"> <h2 class="section-title">🏆 TOP10 排名</h2> <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); gap: 20px;"> <div> <h3>💾 事务大小</h3> {create_top10_table("事务大小", stats['top10_transaction_sizes'], "大小(bytes)", ["时长(ms)", "事件数"])} </div> <div> <h3>⏱️ 事务时长</h3> {create_top10_table("事务时长", stats['top10_transaction_durations'], "时长(ms)", ["大小(bytes)", "事件数"])} </div> <div> <h3>📊 事务事件数</h3> {create_top10_table("事务事件数", stats['top10_transaction_events'], "事件数", ["大小(bytes)", "时长(ms)"])} </div> </div> </div> """ # DML操作统计 if stats['dml_operations']: # 添加验证信息 dml_total_events = sum(op_stats['events_count'] for op_stats in stats['dml_operations'].values()) dml_trans_sum = sum(op_stats['transactions_count'] for op_stats in stats['dml_operations'].values()) html_content += f""" <div class="section"> <h2 class="section-title">🛠️ DML操作统计</h2> <div class="validation"> <strong>数据验证:</strong><br> - 事件数: DML事件总数 = {dml_total_events}, 总事件数 = {stats['total_events']}, 匹配状态: <strong>{'✅ 匹配' if dml_total_events == stats['total_events'] else '❌ 不匹配'}</strong><br> - 事务数: DML事务数之和 = {dml_trans_sum}, 实际DML事务数 = {stats['dml_total_transactions']} </div> <div class="note"> <strong>注意:</strong>一个事务可能包含多种DML操作,因此每个DML类型统计的事务数可能会重复计算同一个事务,导致DML事务数之和大于实际DML事务数。 </div> <table> <tr> <th>操作类型</th> <th>事件数</th> <th>占比</th> <th>事务数</th> <th>占比</th> <th>总大小(bytes)</th> <th>总时长(ms)</th> </tr> """ for op_type, op_stats in stats['dml_operations'].items(): if op_stats['events_count'] > 0: event_percent = op_stats['events_count'] / stats['total_events'] * 100 trans_percent = op_stats['transactions_count'] / stats['dml_total_transactions'] * 100 html_content += f""" <tr> <td>{op_type}</td> <td>{op_stats['events_count']}</td> <td>{event_percent:.1f}%</td> <td>{op_stats['transactions_count']}</td> <td>{trans_percent:.1f}%</td> <td>{op_stats['total_size']:,.0f}</td> <td>{op_stats['total_duration_ms']:.2f}</td> </tr> """ html_content += "</table></div>" # 表操作TOP10 def create_table_top10(title, data): if not data: return "<p>无数据</p>" table_html = f""" <table class="top10-table"> <tr> <th>排名</th> <th>表名</th> <th>事件次数</th> <th>事务次数</th> </tr> """ for i, (table, event_count, trans_count) in enumerate(data, 1): table_html += f""" <tr> <td>#{i}</td> <td>{table}</td> <td>{event_count}</td> <td>{trans_count}</td> </tr> """ table_html += "</table>" return table_html html_content += f""" <div class="section"> <h2 class="section-title">📋 表操作统计</h2> <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px;"> <div> <h3>📥 INSERT操作TOP10</h3> {create_table_top10("INSERT", stats['top10_insert_tables'])} </div> <div> <h3>📤 DELETE操作TOP10</h3> {create_table_top10("DELETE", stats['top10_delete_tables'])} </div> <div> <h3>🔄 UPDATE操作TOP10</h3> {create_table_top10("UPDATE", stats['top10_update_tables'])} </div> </div> </div> """ # 添加说明 html_content += """ <div class="section"> <h2 class="section-title">📝 说明</h2> <div class="note"> <p><strong>注意:</strong></p> <ul> <li>事务大小计算基于binlog中的位置差值的绝对值</li> <li>事务时长计算基于事务开始和结束的时间戳差异</li> <li>DML操作统计中的总大小和总时长按事件数比例分配到各个操作类型</li> <li>表操作统计同时显示事件次数和涉及的事务次数</li> <li>一个事务可能包含多种DML操作,因此DML操作的事务数之和可能大于实际DML事务数</li> <li>DML事件总数应该等于总体统计中的总事件数</li> </ul> </div> </div> """ html_content += """ <div class="timestamp"> <p>报告生成工具: MySQL Binlog Analyzer</p> </div> </div> </body> </html> """ try: with open(output_file, 'w', encoding='utf-8') as f: f.write(html_content) print(f"✅ HTML报告已生成: {output_file}") return True except Exception as e: print(f"❌ 生成HTML报告失败: {e}") return False def main(): parser = argparse.ArgumentParser(description='MySQL Binlog事务分析工具') parser.add_argument('filename', help='要分析的binlog文件路径') parser.add_argument('--html', help='生成HTML报告文件路径', metavar='OUTPUT_FILE') parser.add_argument('--console', action='store_true', default=True, help='在控制台显示报告(默认)') parser.add_argument('--no-console', action='store_false', dest='console', help='不在控制台显示报告') args = parser.parse_args() if not Path(args.filename).exists(): print(f"错误: 文件 {args.filename} 不存在") sys.exit(1) analyzer = BinlogAnalyzer() # 解析文件 if not analyzer.parse_file(args.filename): sys.exit(1) # 生成报告 if args.console: analyzer.print_console_report() if args.html: analyzer.generate_html_report(args.html) else: # 默认也生成HTML报告 analyzer.generate_html_report() if __name__ == "__main__": main()
3、执行效果
[root@dbtbj binlog]# python3 binay.py 000680_01.sh 正在解析文件: 000680_01.sh 文件行数: 318223 解析完成,找到 1159 个事务 ================================================================================ 🎯 MySQL Binlog 事务分析报告 ================================================================================ 📅 时间范围: 2025-10-28 06:00:00 到 2025-10-28 06:59:56 ⏳ 时间跨度: 1.00 小时 (3596 秒) 🏆 事务大小 TOP10 (字节): ────────────────────────────────────────────────────────────────────── # 1 事务 784: 283,583 bytes | 时长: 0.000 ms | 事件数: 1832 # 2 事务 623: 156,506 bytes | 时长: 0.000 ms | 事件数: 1017 # 3 事务 647: 108,348 bytes | 时长: 0.000 ms | 事件数: 713 # 4 事务 640: 99,310 bytes | 时长: 0.000 ms | 事件数: 645 # 5 事务 971: 38,588 bytes | 时长: 0.000 ms | 事件数: 250 # 6 事务 823: 37,532 bytes | 时长: 0.000 ms | 事件数: 243 # 7 事务 608: 36,152 bytes | 时长: 0.000 ms | 事件数: 234 # 8 事务 802: 32,289 bytes | 时长: 0.000 ms | 事件数: 209 # 9 事务 1142: 26,136 bytes | 时长: 0.000 ms | 事件数: 171 #10 事务 821: 21,267 bytes | 时长: 0.000 ms | 事件数: 139 ⏱️ 事务时长 TOP10 (毫秒): ────────────────────────────────────────────────────────────────────── # 1 事务 268: 1000.000 ms | 大小: 5,616 bytes | 事件数: 9 # 2 事务 565: 1000.000 ms | 大小: 9,179 bytes | 事件数: 9 # 3 事务 811: 1000.000 ms | 大小: 506 bytes | 事件数: 2 # 4 事务 1045: 1000.000 ms | 大小: 8,046 bytes | 事件数: 9 # 5 事务 1: 0.000 ms | 大小: 18,897 bytes | 事件数: 123 # 6 事务 2: 0.000 ms | 大小: 18,897 bytes | 事件数: 123 # 7 事务 3: 0.000 ms | 大小: 284 bytes | 事件数: 1 # 8 事务 4: 0.000 ms | 大小: 481 bytes | 事件数: 1 # 9 事务 5: 0.000 ms | 大小: 3,556 bytes | 事件数: 8 #10 事务 6: 0.000 ms | 大小: 3,276 bytes | 事件数: 3 📊 事务事件数 TOP10: ────────────────────────────────────────────────────────────────────── # 1 事务 784: 1832 个事件 | 大小: 283,583 bytes | 时长: 0.000 ms # 2 事务 623: 1017 个事件 | 大小: 156,506 bytes | 时长: 0.000 ms # 3 事务 647: 713 个事件 | 大小: 108,348 bytes | 时长: 0.000 ms # 4 事务 640: 645 个事件 | 大小: 99,310 bytes | 时长: 0.000 ms # 5 事务 971: 250 个事件 | 大小: 38,588 bytes | 时长: 0.000 ms # 6 事务 823: 243 个事件 | 大小: 37,532 bytes | 时长: 0.000 ms # 7 事务 608: 234 个事件 | 大小: 36,152 bytes | 时长: 0.000 ms # 8 事务 802: 209 个事件 | 大小: 32,289 bytes | 时长: 0.000 ms # 9 事务 1142: 171 个事件 | 大小: 26,136 bytes | 时长: 0.000 ms #10 事务 821: 139 个事件 | 大小: 21,267 bytes | 时长: 0.000 ms 📈 总体事务统计: ────────────────────────────────────────────────── 📦 事务总数: 1159 📊 总事件数: 12931 ⏱️ 总时长: 4000.000 ms 💾 总大小: 3,502,955 bytes 📊 平均时长: 3.451 ms 📏 平均大小: 3,022 bytes 🔢 平均事件数: 11.2 🛠️ DML操作统计: ────────────────────────────────────────────────── 🔍 事件数验证: DML事件总数 = 12931, 总事件数 = 12931 🔍 事务数验证: DML事务数之和 = 1307, 实际DML事务数 = 1159 💡 说明: 一个事务可能包含多种DML操作,因此DML事务数之和可能大于实际DML事务数 DELETE: 事件数: 1599 ( 12.4%) 事务数: 13 ( 1.1%) 总大小: 245,661 bytes 总时长: 0.000 ms INSERT: 事件数: 9935 ( 76.8%) 事务数: 544 ( 46.9%) 总大小: 1,965,384 bytes 总时长: 2333.333 ms UPDATE: 事件数: 1397 ( 10.8%) 事务数: 750 ( 64.7%) 总大小: 1,291,910 bytes 总时长: 1666.667 ms 📥 INSERT操作 TOP10 表: ────────────────────────────────────────────────── # 1 wallet_m_1.t_report_detail 8745 次事件, 225 个事务 # 2 wallet_m_2.t_report_detail 234 次事件, 1 个事务 # 3 wallet_m_2.t_task 198 次事件, 84 个事务 # 4 wallet_m_1.t_task 146 次事件, 82 个事务 # 5 wallet_m_2.t_transaction_record 77 次事件, 77 个事务 # 6 wallet_m_2.t_transaction_record_detail 77 次事件, 77 个事务 # 7 wallet_m_1.t_transaction_record_detail 69 次事件, 67 个事务 # 8 wallet_m_1.t_transaction_record 67 次事件, 67 个事务 # 9 wallet_m_2.t_payment 61 次事件, 61 个事务 #10 wallet_m_1.t_payment 50 次事件, 50 个事务 📤 DELETE操作 TOP10 表: ────────────────────────────────────────────────── # 1 wallet_m_1.t_report_detail 1599 次事件, 13 个事务 🔄 UPDATE操作 TOP10 表: ────────────────────────────────────────────────── # 1 wallet_m_2.t_task 396 次事件, 273 个事务 # 2 wallet_m_1.t_task 292 次事件, 227 个事务 # 3 wallet_m_2.t_payment 194 次事件, 86 个事务 # 4 wallet_m_1.t_payment 110 次事件, 55 个事务 # 5 wallet_m_2.t_account 77 次事件, 77 个事务 # 6 wallet_m_2.t_account_detail 77 次事件, 77 个事务 # 7 wallet_m_1.t_account_detail 69 次事件, 67 个事务 # 8 wallet_m_1.t_account 67 次事件, 67 个事务 # 9 wallet_100343.t_task 22 次事件, 15 个事务 #10 wallet_m_2.t_recharge 20 次事件, 20 个事务 ================================================================================ ✅ 分析完成 ✅ HTML报告已生成: 000680_01_report.html
4、HTML效果

posted on
浙公网安备 33010602011771号