oracle、mysql批量、大量数据导入工具
最近经常oracle数据库或mysql数据库的大量数据导入工作,所以利用python编写了简单小巧人一个快速导入的工具.话不多说直接上代码.
1、配置文件
################生产配置################
[sync] #original:1-ORACLE|2-MYSQL|3-SQL|4-CSV (数据来源,) original = 4 #destination:1-ORACLE|2-MYSQL (目标导入数据库类型) destination = 2 #批量导入每次的条数 rowevery = 3000 #表名列表用逗号分隔,SQL或CSV文件类型时配置为: 表名.sql 或 表名.csv tables = TABLE1,TABLE2 [mysql] host=127.0.0.1 port=3306 database=data_test user=admin password=123456 [oracle] host=192.168.0.21 port=1521 database=data_test user=admin password=123456
2、主程序
################################################################################################################################# #功能:数据库导入工具,支持oracle,mysql,sql文件,csv文件导入到oracle和mysql目标数据库类型 #特点:部署简单python2.7以上版本,工具小巧,导入速度快(表10字段内5秒10万+,表30字段1分钟内10万+) #环境:python2.7以上版本 #依赖:pymysql,cx_Oracle #时间:2021年5月10日 #编制:娄来友 ################################################################################################################################# #coding=utf-8 import os import sys #import pdb #添加模块路径 #sys.path.append(sys.path[0] + '/inc') sys.path.append('../inc') import pymysql import cx_Oracle import ConfigParser import time import datetime import re import signal import threading g_flagRun = True g_arrThread = [] g_lstImport = [] g_lstLoadTables = [] con_ini = 'cfg_SyncDatabase.ini' parser = ConfigParser.ConfigParser() parser.read(con_ini) #处理程序退出信号 def sigCHandler(signum, frame): print("Input exit signal!") #current_time = time.time() global g_RunFlag g_RunFlag = False for thr in g_arrThread : thr.join() #线程阻塞,等待至线程中止 print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '应用程序退出;') sys.exit(0) #线程管理 def thdJoin(threads): #令主线程阻塞,等待子线程执行完才继续,使用这个方法比使用join的好处是,可以ctrl+c kill掉进程 for thr in threads: while True: if thr.isAlive(): time.sleep(1) else: break #连接MYSQL数据库 def dbconnectMySql(param): param['port'] = int(param['port']) conn = None try: conn = pymysql.Connect(**dict(param)) except Exception as e: print(e) return conn #连接ORACLE数据库 def dbconnectOracle(): conn = None try: dsn = dict(parser.items('oracle'))['user'] + '/' + dict(parser.items('oracle'))['password'] + '@' + dict(parser.items('oracle'))['host'] + '/' + dict(parser.items('oracle'))['database'] conn = cx_Oracle.connect(dsn) except Exception as e: print(e) return conn #判断变量类型的函数 def getVariateType(tVariate): type='None' if isinstance(tVariate,int): type = "INT" elif isinstance(tVariate,str): type = "STRING" elif isinstance(tVariate,float): type = "FLOAT" elif isinstance(tVariate,list): type = "LIST" elif isinstance(tVariate,tuple): type = "TUPLE" elif isinstance(tVariate,dict): type = "DICT" elif isinstance(tVariate,set): type = "SET" elif isinstance(tVariate,datetime.datetime): type = "DATETIME" return type #导入表定义 class ST_IMPORT: def __init__(self): self.name = '' #表名 self.sqlTemplet = '' #sql模板字符串 self.sqlArgs = [] #模板字符串的参数 self.count = 0 #导入数量 #运行程序 def start(): print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '应用程序启动;') # # for tName in lstTablesName: # st = ST_LOAD_TABLE() # st.name = tName # st.sqlTemplet = '' # g_lstLoadTables.append(st) #添加线程 g_arrThread.append(threading.Thread(target=thdReadDeal)) g_arrThread.append(threading.Thread(target=thdWriteDeal)) #线程启动 for thr in g_arrThread : thr.setDaemon(True) #设置此线程是否被主线程守护回收。默认False不回收,需要在 start 方法前调用;设为True相当于像主线程中注册守护,主线程结束时会将其一并回收 thr.start() #启动线程 #捕获退出信号的方式退出程序 for sig in [signal.SIGINT, # 键盘中 Ctrl-C 组合键信号 signal.SIGHUP, # 发送给具有Terminal的Controlling Process,当terminal 被disconnect时候发送 signal.SIGTERM # 命令行数据 kill pid 时的信号 ]: signal.signal(sig, sigCHandler) thdJoin(g_arrThread) #自定义线程阻塞管理 thd.join() #线程阻塞,等待至线程中止 #导入语句生成线程 def thdReadDeal(): print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '导入语句生成线程启动;') global g_flagRun iSyncOriginal = int(dict(parser.items('sync'))['original']) #数据源 dbOrig = None if iSyncOriginal == 1: dbOrig = dbconnectOracle() elif iSyncOriginal == 2: dbOrig = dbconnectMySql(dict(parser.items('mysql'))) elif iSyncOriginal != 3 and iSyncOriginal != 4 : print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '未定义的数据来源,导入语句生成线程结束;') g_flagRun = False return #_______________________________从数据库导入_________________________________________________________________________________________________________________________Beg if iSyncOriginal in [1,2] and dbOrig != None: lstTablesName = (dict(parser.items('sync'))['tables']).split(",") #从配置文件中取出需导入的表名 nRowEvery = int(dict(parser.items('sync'))['rowevery']) #从配置文件中取出每次提交数量 for tTablesName in lstTablesName : #进程关闭线程退出 if not g_flagRun: break #等待前表完成 while len(g_lstImport) > 0 and tTablesName[0] != tTablesName and g_flagRun: time.sleep(1) strSqlTemplet = '' #sql模板字符串 tFieldName = '' #拼接字段名称 tFieldVal = '' #接接字段值 try : #获取操作游标 csOrig = None csOrig = dbOrig.cursor() strSqlOrig = 'select * from ' + tTablesName print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:')+'查询原表数据('+strSqlOrig+');') hRet = csOrig.execute(strSqlOrig) if hRet == None: break g_lstImport.append(tTablesName)#开始导入将表名插入列表(第一条) lstSqlArgs = [] #导入数据参数列表 while True : row = csOrig.fetchone() if row == None or not g_flagRun: if len(lstSqlArgs) > 0 and g_flagRun: g_lstImport.append(lstSqlArgs) lstSqlArgs = [] g_lstImport.append('END') #结束本次导入作业(最后一条) break #生成导入语句 if strSqlTemplet == '': #首次时需要拼接字段列表 for desc in csOrig.description : if tFieldName == '' : tFieldName = desc[0] tFieldVal = '%s' else : tFieldName = tFieldName + ',' + desc[0] tFieldVal = tFieldVal + ',' + '%s' tFieldName = '('+ tFieldName + ')' tFieldVal = '('+ tFieldVal + ')' #拼接导入语句模板 strSqlTemplet = "insert into "+ tTablesName + tFieldName + " values " + tFieldVal g_lstImport.append(strSqlTemplet) #将模板加入列表(第二条) lstSqlArgs.append(row) #将数据加入到列表中 if len(lstSqlArgs) >= nRowEvery : g_lstImport.append(lstSqlArgs) lstSqlArgs = [] while len(g_lstImport) > 10 and g_flagRun: time.sleep(1) csOrig.close() except Exception as e: print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:')+str(e)+';[-1]') g_flagRun = False dbOrig.close() #_______________________________数据库导入_________________________________________________________________________________________________________________________End #pdb.set_trace() #_______________________________从SQL文件导入______________________________________________________________________________________________________________________Beg if iSyncOriginal == 3: lstTablesName = (dict(parser.items('sync'))['tables']).split(",") #从配置文件中取出需导入的表名 nRowEvery = int(dict(parser.items('sync'))['rowevery']) #从配置文件中取出每次提交数量 for tTablesName in lstTablesName : #进程关闭线程退出 if not g_flagRun: break #等待前表完成 while len(g_lstImport) > 0 and tTablesName[0] != tTablesName and g_flagRun: time.sleep(1) try : hSqlFile = open(tTablesName+'.sql', 'rb') #打开文件rb方式最快 g_lstImport.append(tTablesName) #开始导入将表名插入列表(第一条) g_lstImport.append('SQL-FILE') #将模板加入列表(第二条) lstSqlArgs = [] #导入数据参数列表 for tLine in hSqlFile: if not g_flagRun: g_lstImport.append('END') #结束本次导入作业(最后一条) break if tLine.strip() == '': continue lstSqlArgs.append(tLine) if len(lstSqlArgs) >= nRowEvery : g_lstImport.append(lstSqlArgs) lstSqlArgs = [] while len(g_lstImport) > 10 and g_flagRun: time.sleep(1) except Exception as e: print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:')+str(e)+';[-1]') g_flagRun = False finally: if len(lstSqlArgs) > 0 and g_flagRun: g_lstImport.append(lstSqlArgs) lstSqlArgs = [] g_lstImport.append('END') #结束本次导入作业(最后一条) hSqlFile.close() #_______________________________从SQL文件导入______________________________________________________________________________________________________________________End #pdb.set_trace() #_______________________________从CSV文件导入______________________________________________________________________________________________________________________Beg if iSyncOriginal == 4: lstTablesName = (dict(parser.items('sync'))['tables']).split(",") #从配置文件中取出需导入的表名 nRowEvery = int(dict(parser.items('sync'))['rowevery']) #从配置文件中取出每次提交数量 for tTablesName in lstTablesName : #进程关闭线程退出 if not g_flagRun: break #等待前表完成 while len(g_lstImport) > 0 and tTablesName[0] != tTablesName and g_flagRun: time.sleep(1) strSqlTemplet = '' #sql模板字符串 tFieldName = '' #拼接字段名称 tFieldVal = '' #接接字段值 try : hCsvFile = open(tTablesName+'.csv', 'rb') #打开文件rb方式最快 g_lstImport.append(tTablesName) #开始导入将表名插入列表(第一条) lstSqlArgs = [] #导入数据参数列表 for tLine in hCsvFile: if not g_flagRun: g_lstImport.append('END') #结束本次导入作业(最后一条) break tLine=[eval(t) for t in tLine.split(',')] #生成导入语句 if strSqlTemplet == '': #首次时需要拼接字段列表 for desc in tLine: if tFieldName == '' : tFieldName = desc tFieldVal = '%s' else : tFieldName = tFieldName + ',' + desc tFieldVal = tFieldVal + ',' + '%s' tFieldName = '('+ tFieldName + ')' tFieldVal = '('+ tFieldVal + ')' #拼接导入语句模板 strSqlTemplet = "insert into "+ tTablesName + tFieldName + " values " + tFieldVal g_lstImport.append(strSqlTemplet) #将模板加入列表(第二条) continue lstSqlArgs.append(tuple(tLine)) #转成元组 if len(lstSqlArgs) >= nRowEvery : g_lstImport.append(lstSqlArgs) lstSqlArgs = [] while len(g_lstImport) > 10 and g_flagRun: time.sleep(1) except Exception as e: print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:')+str(e)+';[-1]') g_flagRun = False finally: if len(lstSqlArgs) > 0 and g_flagRun: g_lstImport.append(lstSqlArgs) lstSqlArgs = [] g_lstImport.append('END') #结束本次导入作业(最后一条) hCsvFile.close() #_______________________________从CSV文件导入______________________________________________________________________________________________________________________End g_lstImport.append('EXIT') print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '导入语句生成线程结束;') #导入语句执行线程 def thdWriteDeal(): print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '导入语句执行线程启动;') global g_flagRun iSyncDestination = int(dict(parser.items('sync'))['destination']) #从配置文件中取出目标数据库 lstTablesName = (dict(parser.items('sync'))['tables']).split(",") #从配置文件中取出导入的表名 dbDest = None if iSyncDestination == 1: dbDest = dbconnectOracle() elif iSyncDestination == 2: dbDest = dbconnectMySql(dict(parser.items('mysql'))) else : print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '未定义的数据目的;') g_flagRun = False return tablesName = "" #表名 sqlTemplet = '' #sql模板字符串 tmBeg = None tmEnd = None nTimes = 0 nRowTotal = 0 try : csDest = None csDest = dbDest.cursor() while True :#循环列表 if len(g_lstImport) <= 0: if not g_flagRun:#线程结束 break; time.sleep(1) continue tImport = g_lstImport[0] del g_lstImport[0] tImportType = getVariateType(tImport) if tImportType == 'STRING': if tImport in lstTablesName : tablesName = tImport #表名 tmBeg = datetime.datetime.now() nTimes = 0 nRowTotal = 0 continue elif tImport == "END" : #最后余量提交 tmEnd = datetime.datetime.now() if tmBeg != None and tmEnd !=None: print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:<<')+tablesName+'>导入完成,共导入'+str(nRowTotal)+'条,总用时'+str((tmEnd-tmBeg).seconds)+'秒;') else: print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:<<')+tablesName+'>>无有效的导入内容;') tablesName = '' nTimes = 0 nRowTotal = 0 tmBeg = None tmEnd = None continue elif tImport == "EXIT" : break elif tablesName !='' and nTimes == 0: sqlTemplet = tImport continue elif tImportType != 'LIST': print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:<<')+tablesName+'>>导入列表异常;') g_flagRun = False break #print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:')+'执行语句('+strSqlIns+');') #使用批量提交 #pdb.set_trace() if sqlTemplet == 'SQL-FILE': for tSql in tImport: hRet = csDest.execute(tSql) if hRet == None: break else: hRet = csDest.executemany(sqlTemplet,tImport) if hRet == None: print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:<<')+tablesName+'>>执行异常;') g_flagRun = False break dbDest.commit() nRowCount = len(tImport) nRowTotal = nRowTotal + nRowCount nTimes = nTimes + 1 print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:')+"<<%s>>第%d次导入,本次导入%d条,共导入%d条;"%(tablesName,nTimes,nRowCount,nRowTotal) ) except Exception as e: print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:')+str(e)+';[-1]') g_flagRun = False print(datetime.datetime.now().strftime('%Y%m%d%H%M%S:') + '导入语句执行线程结束;') if __name__ == '__main__': start()
3、运行脚本
#!/bin/sh source ~/.bash_profile cd /app/plca/plca/PlScript/SyncDatabase/ nohup python -u SyncDatabase.py >>SyncDatabase.log 2>&1 &


浙公网安备 33010602011771号