mysql导出sql脚本，通过python脚本生成数据库word文档

import re
from docx import Document
from docx.shared import Cm, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
import os

def extract_all_tables_from_sql_file(file_path):
    """从SQL文件中提取所有表结构"""
    tables = {}
    
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()
        
        # 查找所有CREATE TABLE语句
        create_table_pattern = r'CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*ENGINE\s*='
        matches = re.findall(create_table_pattern, content, re.DOTALL | re.IGNORECASE)
        
        print(f"找到 {len(matches)} 个表定义")
        
        for i, (table_name, table_body) in enumerate(matches, 1):
            print(f"解析表 {i}: {table_name}")
            fields = parse_table_body_simple(table_body)
            if fields:
                tables[table_name] = fields
                print(f"  -> 找到 {len(fields)} 个字段")
            else:
                print(f"  -> 警告: 未找到字段")
        
    except Exception as e:
        print(f"读取文件时出错: {e}")
    
    return tables

def parse_table_body_simple(table_body):
    """简单解析表体"""
    fields = []
    
    # 清理表体
    table_body = table_body.strip()
    
    # 分割字段行
    lines = []
    current_line = ''
    in_quotes = False
    quote_char = ''
    paren_depth = 0
    
    for char in table_body:
        if char in ("'", '"') and (not in_quotes or char == quote_char):
            in_quotes = not in_quotes
            if in_quotes:
                quote_char = char
            else:
                quote_char = ''
            current_line += char
        elif char == '(' and not in_quotes:
            paren_depth += 1
            current_line += char
        elif char == ')' and not in_quotes and paren_depth > 0:
            paren_depth -= 1
            current_line += char
        elif char == ',' and not in_quotes and paren_depth == 0:
            lines.append(current_line.strip())
            current_line = ''
        else:
            current_line += char
    
    if current_line.strip():
        lines.append(current_line.strip())
    
    # 解析每个字段
    for line in lines:
        line = line.strip()
        if not line:
            continue
        
        # 跳过索引和约束
        if re.match(r'^(PRIMARY|UNIQUE|KEY|INDEX|CONSTRAINT|FOREIGN|CHECK)', line, re.I):
            continue
        
        # 匹配字段定义
        # 模式1: 带反引号的字段名
        pattern1 = r'`([^`]+)`\s+([a-zA-Z]+(?:\(\d+(?:,\s*\d+)?\))?)'
        # 模式2: 不带反引号的字段名
        pattern2 = r'(\w+)\s+([a-zA-Z]+(?:\(\d+(?:,\s*\d+)?\))?)'
        
        match = re.match(pattern1, line) or re.match(pattern2, line)
        if not match:
            continue
        
        field_name = match.group(1)
        data_type = match.group(2)
        
        # 获取剩余部分
        remaining = line[match.end():].strip()
        
        # 分析属性
        is_nullable = 'NOT NULL' not in remaining.upper()
        is_auto_increment = 'AUTO_INCREMENT' in remaining.upper()
        is_primary_key = 'PRIMARY KEY' in remaining.upper()
        
        # 提取注释
        comment = ''
        comment_match = re.search(r"COMMENT\s+['\"]([^'\"]*)['\"]", remaining, re.I)
        if comment_match:
            comment = comment_match.group(1)
        
        fields.append({
            '字段名': field_name,
            '数据类型': data_type,
            '是否主键': '是' if is_primary_key else '否',
            '是否自增': '是' if is_auto_increment else '否',
            '允许为空': '是' if is_nullable else '否',
            '注释': comment
        })
    
    return fields

def create_word_document_for_tables(tables, output_path='数据库表结构完整版.docx'):
    """为所有表创建Word文档"""
    doc = Document()
    
    # 设置样式
    style = doc.styles['Normal']
    style.font.name = '宋体'
    style.font.size = Pt(10.5)
    
    # 标题
    title = doc.add_heading('数据库表结构文档 - 完整版', 0)
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    # 统计信息
    total_tables = len(tables)
    total_fields = sum(len(fields) for fields in tables.values())
    
    info = doc.add_paragraph()
    info.add_run(f'表总数: {total_tables}').bold = True
    info.add_run(f'\n字段总数: {total_fields}').bold = True
    info.add_run(f'\n生成时间: 2026-04-02\n\n')
    
    # 表列表
    doc.add_heading('表列表', level=1)
    for i, table_name in enumerate(sorted(tables.keys()), 1):
        field_count = len(tables[table_name])
        doc.add_paragraph(f'{i}. {table_name} ({field_count}个字段)', style='List Number')
    
    doc.add_page_break()
    
    # 详细表结构
    doc.add_heading('表结构详情', level=1)
    
    for i, table_name in enumerate(sorted(tables.keys()), 1):
        fields = tables[table_name]
        
        # 表标题
        doc.add_heading(f'{i}. 表名: {table_name}', level=2)
        doc.add_paragraph(f'字段数量: {len(fields)}个\n')
        
        if fields:
            # 创建表格
            table = doc.add_table(rows=1, cols=6)
            table.style = 'Table Grid'
            
            # 表头
            headers = ['字段名', '数据类型', '是否主键', '是否自增', '允许为空', '注释']
            for col_idx, header in enumerate(headers):
                cell = table.rows[0].cells[col_idx]
                cell.text = header
                for para in cell.paragraphs:
                    for run in para.runs:
                        run.font.bold = True
            
            # 填充数据
            for field in fields:
                row = table.add_row()
                row.cells[0].text = field['字段名']
                row.cells[1].text = field['数据类型']
                row.cells[2].text = field['是否主键']
                row.cells[3].text = field['是否自增']
                row.cells[4].text = field['允许为空']
                row.cells[5].text = field['注释']
            
            # 设置列宽
            widths = [Cm(2.8), Cm(2.5), Cm(1.8), Cm(1.8), Cm(1.8), Cm(5.5)]
            for row in table.rows:
                for col_idx, cell in enumerate(row.cells):
                    if col_idx < len(widths):
                        cell.width = widths[col_idx]
        else:
            doc.add_paragraph('警告: 该表未解析到字段信息', style='Intense Quote')
        
        # 添加间距
        doc.add_paragraph()
        
        # 如果不是最后一个表，添加分页
        if i < total_tables:
            doc.add_page_break()
    
    # 保存文档
    doc.save(output_path)
    return output_path

def main():
    """主函数"""
    print("="*60)
    print("SQL转Word文档生成器 - 增强版")
    print("="*60)
    
    # 配置
    sql_file = "sql/risk_system_db.sql"  # 修改为您的SQL文件名
    output_file = "doc/risk_system_db.docx"   # 修改为您的WORD文件名
    
    if not os.path.exists(sql_file):
        print(f"错误: 找不到文件 '{sql_file}'")
        print("请确保文件存在于当前目录")
        
        # 显示当前目录文件
        print("\n当前目录文件:")
        files = os.listdir('.')
        sql_files = [f for f in files if f.lower().endswith('.sql')]
        for f in sql_files:
            print(f"  - {f}")
        
        if sql_files:
            print(f"\n建议: 修改代码中的 sql_file 变量为 '{sql_files[0]}'")
        return
    
    print(f"处理SQL文件: {sql_file}")
    
    # 解析SQL文件
    tables = extract_all_tables_from_sql_file(sql_file)
    
    if not tables:
        print("错误: 未解析到任何表结构")
        print("可能原因:")
        print("1. SQL文件格式不正确")
        print("2. 文件编码不是UTF-8")
        print("3. SQL语句格式不标准")
        return
    
    print(f"\n解析完成:")
    print(f"  表总数: {len(tables)}")
    
    total_fields = sum(len(fields) for fields in tables.values())
    print(f"  字段总数: {total_fields}")
    
    # 显示表信息
    print("\n表详情:")
    for i, (table_name, fields) in enumerate(sorted(tables.items()), 1):
        print(f"  {i:2d}. {table_name:30s} - {len(fields):3d} 个字段")
        if len(fields) > 0:
            print(f"      示例字段: {fields[0]['字段名']} ({fields[0]['数据类型']})")
    
    # 生成Word文档
    print(f"\n生成Word文档...")
    output_path = create_word_document_for_tables(tables, output_file)
    
    print(f"✓ 文档已生成: {output_path}")
    print(f"✓ 请查看生成的文档验证字段数量")

if __name__ == "__main__":
    main()

    # 配置，主要修改以下2个地方，在python编辑器或者服务器上面执行。
    sql_file = "sql/risk_system_db.sql"  # 修改为您的SQL文件名
    output_file = "doc/risk_system_db.docx"   # 修改为您的WORD文件名

# 执行命令 - 基本用法
python 文件名.py

# 如果系统中有多个Python版本
python3 文件名.py

posted on 2026-04-07 11:33 oktokeep 阅读(16) 评论(0) 收藏举报