使用python插入5000w数据到mysql

应用场景,数据库数据达到千万以后,做数据分析执行脚本开始变慢,所以用到clickhouse,先插入5000w数据测试

先安装pip支持mysql.connector,我安装的python3.11
pip3 install mysql-connector-python
再使用pip3 list查看是否安装成功

以下是mysql创建表语句

CREATE TABLE `test_table` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`user_id` int(11) DEFAULT NULL,
`recharge_amount` int(11) DEFAULT NULL,
`bonus` decimal(12,2) DEFAULT NULL,
`message` varchar(255) DEFAULT NULL,
`content` varchar(255) DEFAULT NULL,
`remark` varchar(255) DEFAULT NULL,
`order_id` varchar(32) DEFAULT NULL,
`create_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`update_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=50000001 DEFAULT CHARSET=utf8;

python代码如下:

import mysql.connector
import random
import string
import datetime

# MySQL 连接信息
config = {
    'user': 'root',
    'password': 'root',
    'host': '127.0.0.1',
    'database': 'db1',
}

# 生成模拟数据
def generate_data(batch_size=10000):
    for i in range(batch_size):
        # 生成模拟数据
        yield [random_string(32),random_string(16),random_string(64),getTimeStr(),random.randint(1000000, 9999999)]

#生成随机字符串
def random_string(length):
    letters = string.ascii_lowercase + string.digits
    return ''.join(random.choice(letters) for i in range(length))

def getTimeStr():
    # 获取当前日期和时间
    now = datetime.datetime.now()
    # 格式化为YmdHis格式
    formatted_time = now.strftime("%Y%m%d%H%M%S")
    # 将日期和时间转换为整数
    timestamp = int(now.timestamp())
    return formatted_time+str(timestamp)


def insertData():
    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()
    # 数据总量
    total = 50000000

    # 每批次插入的数据量
    batch_size = 100000

    # 循环插入数据
    for i in range(0, total, batch_size):
        # 准备SQL语句,插入一批数据到表格中
        sql = "INSERT INTO test_table (message, content, remark, order_id,user_id,recharge_amount,bonus) VALUES "
        values = []
        for j in range(batch_size):
            values.append((random_string(32),random_string(16),random_string(64),getTimeStr(),random.randint(1000000, 9999999),random.randint(1, 10000),random.uniform(1.0, 100.0)))
        sql += ", ".join(["(%s, %s,%s,%s,%s,%s,%s)"] * len(values))
        val = [item for sublist in values for item in sublist]
        cursor.execute(sql, val)

        # 提交事务
        connection.commit()

        # 输出当前进度信息
        print("{:.2f}% 完成".format((i + batch_size) / total * 100))

    print(cursor.rowcount, "记录插入成功。")

if __name__ == '__main__':
    insertData()
posted @ 2023-06-10 17:41  八戒vs  阅读(67)  评论(0编辑  收藏  举报