python 数据库数据较验

# -*- coding: utf-8 -*-
# @Time   : 2023-09-07
# @Author : Carl_DJ

"""
实现功能:
    1、python直接链接SqlServer数据库可以MySQL数据库,进行数据对比
    2、对比mysql和SqlServer两个数据库的表名以及表内数据的差异:
    3、对比的差异分别输出到文件中
        ---->数据差异,存到 .xlsx文件
        ---->表的差异,存到 .log文件
    4、因为MC表及数据较多,使用多线程进行并发比较数据,提升效率
    5、如果对比其他环境的数据,只需换更换 数据库信息即可
    6、推荐使用pymssql、pymysql 链接数据库
"""
import concurrent.futures
import mysql.connector
import pyodbc
import  pymssql
import pymysql
import time
import pandas as pd
import threading
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows

'''
使用mysql.connector 链接 mysql,
使用pyodbc 链接 SqlServer

'''
# MySQL 数据库连接配置
# mysql_conn = mysql.connector.connect(
#                     host='localhost',  # MySQL主机地址
#                    user='username',  # MySQL用户名
#                    password='password',  # MySQL密码
#                    database='DBname',  # 需要对比的数据库名
#                     )
#
#
# # SQL Server 数据库连接配置
# sql_server_conn = pyodbc.connect(
#     r'DRIVER={SQL Server};SERVER=server_name;DATABASE=dbname;UID=username;PWD=password;DATABASE=DBname')


'''
使用 pymysql 链接 mysql数据库
使用pymssql 链接 SqlServer数据库
'''

# MySQL 数据库连接配置
mysql_conn = pymysql.connect(host="localhost", 
                            port=port, user="username", 
                            password="password",
                            db="DBname"
                            )
if mysql_conn:
    print("mysql数据库链接成功")
time.sleep(1)

# 链接SqlServer
sql_server_conn = pymssql.connect(host="localhost", 
                                  port="port",
                                  user="username", 
                                  password="password",
                                  database="DBname"
                                )
if sql_server_conn:
    print("sqlserver数据库链接成功")
time.sleep(1)

#时间
now = time.strftime("%Y-%m-%d_%H%M%S",time.localtime())
print(f'开始执行差异对比:{now}')

# 获取MySQL数据库中所有表名(忽略表名大小写)
def get_mysql_table_names():
    cursor = mysql_conn.cursor()
    cursor.execute("SHOW TABLES")
    tables = [table[0].lower() for table in cursor.fetchall()]
    cursor.close()
    return tables

# 获取SQL Server数据库中所有表名(忽略表名大小写)
def get_sql_server_table_names():
    cursor = sql_server_conn.cursor()
    cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_type='BASE TABLE'")
    tables = [table[0].lower() for table in cursor.fetchall()]
    cursor.close()
    return tables

# 比较两个数据库中指定表的差异数据,并将结果存储到Excel表格中
def compare_tables(table_name):
    try:
        # 从MySQL数据库中获取表数据
        mysql_cursor = mysql_conn.cursor()
        mysql_cursor.execute(f"SELECT * FROM {table_name}")
        mysql_data = mysql_cursor.fetchall()
        mysql_columns = [column[0] for column in mysql_cursor.description]

        # 从SQL Server数据库中获取表数据
        sql_server_cursor = sql_server_conn.cursor()
        sql_server_cursor.execute(f"SELECT * FROM {table_name}")
        sql_server_data = sql_server_cursor.fetchall()
        sql_server_columns = [column[0] for column in sql_server_cursor.description]

        # 将数据转换为pandas DataFrame进行比较
        mysql_df = pd.DataFrame(mysql_data, columns=mysql_columns)
        sql_server_df = pd.DataFrame(sql_server_data, columns=sql_server_columns)
        diff = mysql_df.compare(sql_server_df)

        # 如果存在差异,将差异数据存储到Excel表格中
        if not diff.empty:
            diff_data_name = ('different_data' + now + '.xlsx')
            with pd.ExcelWriter(diff_data_name) as writer:
                diff.to_excel(writer, sheet_name=table_name)
                print(f"表 {table_name}存在差异,并存在{diff_data_name}文件中")
    except Exception as e:
        print(f"比较表 {table_name}出错,错误原因: {str(e)}")
        pass

# 获取MySQL和SQL Server数据库中所有不同的表名,并将它们写入日志文件
def get_different_tables():
    mysql_tables = get_mysql_table_names()
    sql_server_tables = get_sql_server_table_names()
    different_tables = set(mysql_tables).symmetric_difference(set(sql_server_tables))
    different_tables_log = ('different_tables' + now + '.log')
    with open(different_tables_log, 'w') as f:
        for table in different_tables:
            f.write(f"Different table: {table}" + '\n')
            print(f"Different table: {table}")
    return different_tables

# 使用多线程进行数据校验
def thread_worker(table):
    compare_tables(table)

# 运行
if __name__ == '__main__':
    # 获取MySQL和SQL Server数据库中所有不同的表名,并将它们写入日志文件
    different_tables = get_different_tables()
    # 创建线程列表
    
    # 创建线程池,最多使用5个线程
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        # 启动多线程进行数据校验
        executor.map(thread_worker, different_tables)
        
        
    """    该场景不一样 
    threads = []
    # 启动多线程进行数据校验
    for table in different_tables:
        thread = threading.Thread(target=thread_worker, args=(table,))
        threads.append(thread)
        thread.start()

    # 等待所有线程完成
    for thread in threads:
        thread.join()
    """        
        
        
    # 关闭数据库连接 
    mysql_conn.close()
    sql_server_conn.close()
    
    print("Different tables:")
    print(list(different_tables))
    

 PS :https://blog.csdn.net/wuyoudeyuer/article/details/132753669 

posted @ 2024-02-21 18:14  yongqi-911  阅读(11)  评论(0编辑  收藏  举报