mysql导出sql脚本,通过python脚本生成数据库word文档
mysql导出sql脚本,通过python脚本生成数据库word文档
import re from docx import Document from docx.shared import Cm, Pt from docx.enum.text import WD_ALIGN_PARAGRAPH import os def extract_all_tables_from_sql_file(file_path): """从SQL文件中提取所有表结构""" tables = {} try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() # 查找所有CREATE TABLE语句 create_table_pattern = r'CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*ENGINE\s*=' matches = re.findall(create_table_pattern, content, re.DOTALL | re.IGNORECASE) print(f"找到 {len(matches)} 个表定义") for i, (table_name, table_body) in enumerate(matches, 1): print(f"解析表 {i}: {table_name}") fields = parse_table_body_simple(table_body) if fields: tables[table_name] = fields print(f" -> 找到 {len(fields)} 个字段") else: print(f" -> 警告: 未找到字段") except Exception as e: print(f"读取文件时出错: {e}") return tables def parse_table_body_simple(table_body): """简单解析表体""" fields = [] # 清理表体 table_body = table_body.strip() # 分割字段行 lines = [] current_line = '' in_quotes = False quote_char = '' paren_depth = 0 for char in table_body: if char in ("'", '"') and (not in_quotes or char == quote_char): in_quotes = not in_quotes if in_quotes: quote_char = char else: quote_char = '' current_line += char elif char == '(' and not in_quotes: paren_depth += 1 current_line += char elif char == ')' and not in_quotes and paren_depth > 0: paren_depth -= 1 current_line += char elif char == ',' and not in_quotes and paren_depth == 0: lines.append(current_line.strip()) current_line = '' else: current_line += char if current_line.strip(): lines.append(current_line.strip()) # 解析每个字段 for line in lines: line = line.strip() if not line: continue # 跳过索引和约束 if re.match(r'^(PRIMARY|UNIQUE|KEY|INDEX|CONSTRAINT|FOREIGN|CHECK)', line, re.I): continue # 匹配字段定义 # 模式1: 带反引号的字段名 pattern1 = r'`([^`]+)`\s+([a-zA-Z]+(?:\(\d+(?:,\s*\d+)?\))?)' # 模式2: 不带反引号的字段名 pattern2 = r'(\w+)\s+([a-zA-Z]+(?:\(\d+(?:,\s*\d+)?\))?)' match = re.match(pattern1, line) or re.match(pattern2, line) if not match: continue field_name = match.group(1) data_type = match.group(2) # 获取剩余部分 remaining = line[match.end():].strip() # 分析属性 is_nullable = 'NOT NULL' not in remaining.upper() is_auto_increment = 'AUTO_INCREMENT' in remaining.upper() is_primary_key = 'PRIMARY KEY' in remaining.upper() # 提取注释 comment = '' comment_match = re.search(r"COMMENT\s+['\"]([^'\"]*)['\"]", remaining, re.I) if comment_match: comment = comment_match.group(1) fields.append({ '字段名': field_name, '数据类型': data_type, '是否主键': '是' if is_primary_key else '否', '是否自增': '是' if is_auto_increment else '否', '允许为空': '是' if is_nullable else '否', '注释': comment }) return fields def create_word_document_for_tables(tables, output_path='数据库表结构完整版.docx'): """为所有表创建Word文档""" doc = Document() # 设置样式 style = doc.styles['Normal'] style.font.name = '宋体' style.font.size = Pt(10.5) # 标题 title = doc.add_heading('数据库表结构文档 - 完整版', 0) title.alignment = WD_ALIGN_PARAGRAPH.CENTER # 统计信息 total_tables = len(tables) total_fields = sum(len(fields) for fields in tables.values()) info = doc.add_paragraph() info.add_run(f'表总数: {total_tables}').bold = True info.add_run(f'\n字段总数: {total_fields}').bold = True info.add_run(f'\n生成时间: 2026-04-02\n\n') # 表列表 doc.add_heading('表列表', level=1) for i, table_name in enumerate(sorted(tables.keys()), 1): field_count = len(tables[table_name]) doc.add_paragraph(f'{i}. {table_name} ({field_count}个字段)', style='List Number') doc.add_page_break() # 详细表结构 doc.add_heading('表结构详情', level=1) for i, table_name in enumerate(sorted(tables.keys()), 1): fields = tables[table_name] # 表标题 doc.add_heading(f'{i}. 表名: {table_name}', level=2) doc.add_paragraph(f'字段数量: {len(fields)}个\n') if fields: # 创建表格 table = doc.add_table(rows=1, cols=6) table.style = 'Table Grid' # 表头 headers = ['字段名', '数据类型', '是否主键', '是否自增', '允许为空', '注释'] for col_idx, header in enumerate(headers): cell = table.rows[0].cells[col_idx] cell.text = header for para in cell.paragraphs: for run in para.runs: run.font.bold = True # 填充数据 for field in fields: row = table.add_row() row.cells[0].text = field['字段名'] row.cells[1].text = field['数据类型'] row.cells[2].text = field['是否主键'] row.cells[3].text = field['是否自增'] row.cells[4].text = field['允许为空'] row.cells[5].text = field['注释'] # 设置列宽 widths = [Cm(2.8), Cm(2.5), Cm(1.8), Cm(1.8), Cm(1.8), Cm(5.5)] for row in table.rows: for col_idx, cell in enumerate(row.cells): if col_idx < len(widths): cell.width = widths[col_idx] else: doc.add_paragraph('警告: 该表未解析到字段信息', style='Intense Quote') # 添加间距 doc.add_paragraph() # 如果不是最后一个表,添加分页 if i < total_tables: doc.add_page_break() # 保存文档 doc.save(output_path) return output_path def main(): """主函数""" print("="*60) print("SQL转Word文档生成器 - 增强版") print("="*60) # 配置 sql_file = "sql/risk_system_db.sql" # 修改为您的SQL文件名 output_file = "doc/risk_system_db.docx" # 修改为您的WORD文件名 if not os.path.exists(sql_file): print(f"错误: 找不到文件 '{sql_file}'") print("请确保文件存在于当前目录") # 显示当前目录文件 print("\n当前目录文件:") files = os.listdir('.') sql_files = [f for f in files if f.lower().endswith('.sql')] for f in sql_files: print(f" - {f}") if sql_files: print(f"\n建议: 修改代码中的 sql_file 变量为 '{sql_files[0]}'") return print(f"处理SQL文件: {sql_file}") # 解析SQL文件 tables = extract_all_tables_from_sql_file(sql_file) if not tables: print("错误: 未解析到任何表结构") print("可能原因:") print("1. SQL文件格式不正确") print("2. 文件编码不是UTF-8") print("3. SQL语句格式不标准") return print(f"\n解析完成:") print(f" 表总数: {len(tables)}") total_fields = sum(len(fields) for fields in tables.values()) print(f" 字段总数: {total_fields}") # 显示表信息 print("\n表详情:") for i, (table_name, fields) in enumerate(sorted(tables.items()), 1): print(f" {i:2d}. {table_name:30s} - {len(fields):3d} 个字段") if len(fields) > 0: print(f" 示例字段: {fields[0]['字段名']} ({fields[0]['数据类型']})") # 生成Word文档 print(f"\n生成Word文档...") output_path = create_word_document_for_tables(tables, output_file) print(f"✓ 文档已生成: {output_path}") print(f"✓ 请查看生成的文档验证字段数量") if __name__ == "__main__": main()
# 配置,主要修改以下2个地方,在python编辑器或者服务器上面执行。
sql_file = "sql/risk_system_db.sql" # 修改为您的SQL文件名
output_file = "doc/risk_system_db.docx" # 修改为您的WORD文件名
# 执行命令 - 基本用法
python 文件名.py
# 如果系统中有多个Python版本
python3 文件名.py
浙公网安备 33010602011771号