# -*- coding: utf-8 -*-
import os
import pandas as pd
def split_csv(src_file_name, num, flag, file_encoding):
df = pd.read_csv(src_file_name, encoding='gbk')
# total_num,原始文件总行数;num_tmp根据num计算出文件个数或每个文件行数
total_num = df.shape[0]
num_tmp = float(total_num / num)
# 算出有小数时,加1
if int(num_tmp) < num_tmp:
num2 = int(num_tmp) + 1
else:
num2 = int(num_tmp)
# print(num2)
if flag == 0:
pass
elif flag == 1:
# 原始脚本是当flag ==0 时设计的;当为1时,交换一下参数
num, num2 = num2, num
print(num, num2, total_num)
# 文件名和扩展名分开
f_name, f_extension = os.path.splitext(src_file_name)
for i in range(1, num2 + 1):
df.iloc[(i - 1) * num:i * num].to_csv(
f"{f_name}{i}{file_encoding}{f_extension}",
index=False,
encoding=file_encoding)
print(i)
if __name__ == '__main__':
# 设置原始数据名称
src_file_name = '银行交易流水.txt'
# 设置结果每个文件数据行数
num = 10000
# 设置结果文件编码格式
file_encoding = 'utf-8'
# flag:0:num为每个文件的行数;1:num为分成文件数
flag = 0
# main函数
split_csv(src_file_name, num, flag, file_encoding)