Python导出DB2数据库的表结构(使用SSH隧道)

1、安装依赖

pip install sshtunnel pyodbc  openpyxl  pandas 

2、安装 DB2 ODBC 驱动​

我使用的DB2版本是10.5,下载的ODBC驱动文件如下:

ibm_data_server_driver_package_win64_v10.5.exe

3、完整代码

import pandas as pd
import pyodbc
from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, Alignment
from openpyxl.utils import get_column_letter
from sshtunnel import SSHTunnelForwarder


def apply_formatting(excel_path):
    """应用格式设置"""
    wb = load_workbook(excel_path)

    # 定义样式
    blue_fill = PatternFill(
        start_color="0070C0",
        end_color="0070C0",
        fill_type="solid"
    )
    thin_border = Border(
        left=Side(style='thin'),
        right=Side(style='thin'),
        top=Side(style='thin'),
        bottom=Side(style='thin')
    )

    for sheet in wb.worksheets:
        # 设置标题行样式
        for cell in sheet[1]:
            cell.fill = blue_fill
            cell.font = Font(color="FFFFFF", bold=True)

        # 设置边框
        for row in sheet.iter_rows(min_row=1):
            for cell in row:
                cell.border = thin_border

        # 自适应列宽
        for column in sheet.columns:
            max_length = max(
                len(str(cell.value)) for cell in column
            )
            adjusted_width = (max_length + 2) * 1.2
            sheet.column_dimensions[
                get_column_letter(column[0].column)
            ].width = adjusted_width

        # 定义居中对齐样式(水平居中 + 垂直居中)
        alignment = Alignment(
            horizontal='center',  # 水平居中:'left', 'center', 'right', 'justify'
            vertical='center'  # 垂直居中:'top', 'center', 'bottom'
        )
        # 对指定的列设置居中
        columns_to_center = ['A', 'H']     # 将A列和H列设置居中
        for col in columns_to_center:
            for cell in sheet[col]:
                cell.alignment = alignment

    wb.save(excel_path)


def add_hyperlinks(excel_path):
    """添加双向超链接"""
    wb = load_workbook(excel_path)
    ws_summary = wb["首页总览"]

    # 总览表到分表
    for row in ws_summary.iter_rows(min_row=2):
        table_name = row[1].value
        sheet_name = table_name[:30]
        hyperlink = f"#'{sheet_name}'!A1"
        row[1].hyperlink = hyperlink
        row[1].style = "Hyperlink"

    # 分表到总览表
    for sheet_name in wb.sheetnames[1:]:
        ws = wb[sheet_name]
        last_row = ws.max_row + 1
        ws.cell(row=last_row, column=1, value="返回\"首页总览\"")
        ws.cell(row=last_row, column=1).hyperlink = "#'首页总览'!A1"
        ws.cell(row=last_row, column=1).style = "Hyperlink"

    wb.save(excel_path)


def sql_to_dataframe(cursor):
    """将 pyodbc cursor 结果转换为 DataFrame"""
    # 获取列名
    columns = [column[0] for column in cursor.description]
    # 获取数据并创建DataFrame
    data = cursor.fetchall()
    return pd.DataFrame.from_records(data, columns=columns)


def get_tables_info(cursor, db_user):
    """获取所有表信息"""
    query = f"""
            SELECT 
                TABNAME  AS  table_name,
                 REMARKS AS  table_comment
            FROM SYSCAT.TABLES 
            WHERE  TYPE = 'T'  AND TABSCHEMA = '{db_user}'
            ORDER BY TABNAME
    """
    # 执行查询
    cursor.execute(query)
    return sql_to_dataframe(cursor)


def get_columns_info(cursor, table_name):
    """获取指定表结构详情"""
    query = f"""
            SELECT
                T.TABNAME  AS table_name,
                C.COLNAME  AS column_name,
                C.REMARKS   AS column_comment,
                C.TYPENAME  AS column_type,
                C.LENGTH    AS  type_length,
                NULLIF(C.SCALE,0)   AS  type_scale,
                CASE 
                    WHEN C.NULLS = 'N' THEN '[√]' 
                    ELSE '[]' 
                END              AS  isnotnull
            FROM
                SYSCAT.TABLES T
            JOIN
                SYSCAT.COLUMNS C
                ON T.TABSCHEMA = C.TABSCHEMA 
                AND T.TABNAME = C.TABNAME
            WHERE
                T.TYPE = 'T' 
                AND T.TABSCHEMA NOT IN ('SYSIBM', 'SYSCAT', 'SYSSTAT')  
               AND T.TABNAME = '{table_name}' 
            ORDER BY
                T.TABNAME,
                C.COLNO;  
    """

    # 执行查询
    cursor.execute(query)
    return sql_to_dataframe(cursor)


if __name__ == '__main__':

    # ================== 配置信息 ==================
    # SSH 隧道配置
    ssh_host = "100.100.20.20"  # SSH跳板机IP
    ssh_port = 22               # SSH端口(默认22)
    ssh_user = "root"           # SSH用户名
    ssh_password = "Root@1234"  # SSH密码

    # DB2数据库参数
    db_host = "100.100.10.10"  # 数据库服务器实际IP(SSH隧道目标)
    db_port = 50000           # DB2 默认端口
    db_name = "MYDB"          # DB2 数据库
    db_user = "db2user"       # DB2 用户
    db_password = "DB2@user"  # DB2 密码

    # 本地绑定端口(随机可用端口)
    local_bind_port = 9090

    # ================== 建立SSH隧道 ==================
    try:
        with SSHTunnelForwarder(
                (ssh_host, ssh_port),
                ssh_username=ssh_user,
                ssh_password=ssh_password,
                remote_bind_address=(db_host, db_port),
                local_bind_address=("0.0.0.0", local_bind_port),
                set_keepalive=15         # 保持连接活跃
        ) as tunnel:
            local_bind_port2 = tunnel.local_bind_port
            print(f"SSH隧道建立成功,本地端口:{local_bind_port2}")

            # ================== 连接数据库 ==================
            # 方法一:使用 pyodbc
            # 连接字符串(注意使用本地端口)
            conn_str = (
                "DRIVER={IBM DB2 ODBC DRIVER};"  # 驱动名称需与ODBC配置一致
                f"DATABASE={db_name};"
                "HOSTNAME=localhost;"
                f"PORT={local_bind_port2};"
                "PROTOCOL=TCPIP;"
                f"UID={db_user};"
                f"PWD={db_password};"
            )
            conn = pyodbc.connect(conn_str)
            cursor = conn.cursor()
            print("数据库连接成功!")

            # 获取所有表的信息
            df_summary = get_tables_info(cursor, db_user.upper())
            df_summary.insert(0, '序号', range(1, len(df_summary) + 1))

            # 写入的Excel名称
            output_file = f"数据库{db_name}表结构.xlsx"

            # 创建Excel写入对象
            with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
                # 写入总览页
                df_summary.to_excel(
                    writer,
                    sheet_name='首页总览',
                    index=False,
                    header=['序号', '表名', '表注释']
                )

                # 遍历写入各表结构
                for _, row in df_summary.iterrows():
                    table_name = row['TABLE_NAME']

                    print("正在导出表", table_name, "的表结构")

                    df_columns = get_columns_info(cursor, table_name)
                    df_columns.insert(0, '序号', range(1, len(df_columns) + 1))
                    df_columns.to_excel(
                        writer,
                        sheet_name=table_name[:30],  # Excel表名最长31字符
                        index=False,
                        header=["序号", "表名", "字段名称", "字段注释", "字段类型", "长度", "标度", "非空"]
                    )

            # 应用格式和超链接
            print("正在设置超链接和边框......")
            add_hyperlinks(output_file)    # 设置超链接
            apply_formatting(output_file)   # 设置边框

            print(f"表结构已成功导出至 {output_file}")

    except Exception as e:
        print(f"连接失败: {str(e)}")
    finally:
        if 'conn' in locals():
            conn.close()

 

posted @ 2025-05-20 16:20  业余砖家  阅读(27)  评论(0)    收藏  举报