#!/usr/bin/python
#Filename :smelter_colmerge.py
# -*- coding:utf-8 -*-
import cx_Oracle
import sys
import getopt,string
import re
import os
import datetime
import os.path
import configuration
from Common import *
class kk(object):
def __init__(self):
pass
def conn_db(self):
try:
job_config=configuration.configuration()
user_name=job_config.loadConfigFile('config_test.xml','user_name')
user_password=job_config.loadConfigFile('config_test.xml','user_password')
dns=job_config.loadConfigFile('config_test.xml','dns')
db=cx_Oracle.connect(user_name,user_password,dns)
return db
except Exception as e:
print (-1,e)
def c_process_id(self,pid):
db=self.conn_db()
cursor=db.cursor()
pro_pid=cursor.var(cx_Oracle.NUMBER)
cursor.callproc("common.process_id",[pid,pro_pid])
return pro_pid.getvalue()
db.close()
def prhelp(self):
print ''' Usage: smelter_colmerge.py [options]
Options:
-h, --help show this help message and exit
-i, --interid input file_id with a comma segmentation,can more values input
-n, --processid input process id
-o, --targetdir input targetdir
-b, --before assign to type code
-t, --tableid assign to table id
Example: python smelter_colmerge.py -n 100008111 -o /hadoop/pigtmp/yu -i 'I10173227','I10173228'
or python smelter_colmerge.py -n 100008111 -o /hadoop/pigtmp/yu -b FILE_COLLECT -t 26
'''
#Parameters processing
def fun_param_input(self):
TYPCD=0
INTERID=0
try:
if len(sys.argv) > 6:
opts, args = getopt.getopt(sys.argv[1:],"i:n:o:b:t:h", ["interid=","processid=","targetdir=","before=","tableid=","help"])
#print(sys.argv)
#print(opts)
#print(args)
for a,o in opts:
if a in ('-i', '--interid'):
INTERID=o
#print INTERID
elif a in ('-n','--processid'):
PROCESSID=o
#print PROCESSID
elif a in ('-o','--targetdir'):
targetdir=o
#print targetdir
elif a in ('-b','--before'):
TYPCD=o
#print o
elif a in ('-t','--tableid'):
table_id=o
#print o
elif a in ('-h', '--help'):
self.prhelp()
sys.exit()
if os.path.exists(targetdir):
if INTERID==0:
#print PROCESSID,targetdir,TYPCD,0,table_id
return PROCESSID,targetdir,TYPCD,0,table_id
else:
dd=INTERID.split(',')
#print '###################'
#print dd
#print '###################'
#dd=re.split(',',INTERID)
#print PROCESSID,targetdir,0,dd,0
return PROCESSID,targetdir,0,dd,0
else:
meg='%s not exists'%target_directory
print (-1,meg)
else:
print 'Please input right parameters'
self.prhelp()
sys.exit()
except Exception as e:
return (-1,e)
# find file
def fun_find_file(self):
PROCESSID,targetdir,bftype,f_list,table_id=self.fun_param_input()
#print f_list
process_id=self.c_process_id(PROCESSID)
db=self.conn_db()
cursor=db.cursor()
dm_t=myexec_type()
tp_id=dm_t.code_to_id('FILE_MERGE')
if bftype==0:
i=0
j=0
source_file=[]
if len(f_list)<=1:
meg='please input two or more'
print (-1,meg)
return 0,0,0,0,0,0
else:
mg=[]
while i< len(f_list):
#print tp_id
cursor.execute('select distinct if_file_name,table_id,file_id,file_type,current_directory,data_date,hour_id from log_file where file_id=:fl_id and status=\'S\'and type_id <>:tp_id',fl_id=f_list[i],tp_id=tp_id)
a=cursor.fetchall()
if len(a)==1:
while j <= i:
source_file.append(a[0])
j+=1
else:
meg='%s is wrong parameter'%f_list[i]
mg.append(meg)
i+=1
if len(mg) > 1:
print (-1,mg)
if len(source_file)==len(f_list):
#print '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
#print i,source_file,tp_id,targetdir,process_id,source_file[0][1]
return i,source_file,tp_id,targetdir,process_id,source_file[0][1]
else:
return 0,0,0,0,0,0
else:
m=0
source_file=[]
bf_id =dm_t.code_to_id(bftype)
#print bf_id
cursor.execute('select max(process_id) from log_file where type_id=:tpp_id and table_id=:tbb_id and status=\'S\'',tpp_id=bf_id,tbb_id=table_id)
c=cursor.fetchall()
pcsid=c[0][0]
cursor.execute('select distinct if_file_name, table_id,file_id,file_type,current_directory,data_date,hour_id from log_file where status=\'S\' and process_id=:pdd and table_id=:tbb_id and type_id <>:tp_id',pdd=pcsid,tbb_id=table_id,tp_id=tp_id)
a=cursor.fetchall()
if len(a)>=2:
while m<len(a):
source_file.append(a[m])
m+=1
#print "############################################################"
#print m,source_file,tp_id,targetdir,process_id,table_id
return m,source_file,tp_id,targetdir,process_id,table_id
else:
meg='File number less than two'
print (-1,meg)
return 0,0,0,0,0,0
db.close()
# search if exit the merge file
def fun_targetdir(self):
f_num,source_file,tp_id,targetdir,process_id,table_id=self.fun_find_file()
if f_num == 0:
return 0,0,0,0,0,0
else:
newname = 'merge'+source_file[0][0]
targetname = os.path.join(targetdir,newname)
if os.path.exists(targetname):
meg='%s already exits'%targetname
print (-1,meg)
return 0,0,0,0,0,0
else:
#print f_num,source_file,tp_id,targetdir,process_id,table_id
return f_num,source_file,tp_id,targetdir,process_id,table_id
# Judgment field is consistent
def fun_colnum(self):
try:
f_num,source_file,tp_id,targetdir,process_id,table_id=self.fun_targetdir()
db=self.conn_db()
cursor=db.cursor()
dm_t=myexec_type()
vf_id=dm_t.code_to_id('IF_FILE_VERIFY_MASK')
cursor.execute('select fields_terminated_by from dm_if_table where table_id=:tbl_id',tbl_id=table_id)
d=cursor.fetchall()
f_termin=d[0][0]
f_termin=f_termin.replace('\'',"")
f_termin=f_termin.replace("\"",'')
cursor.execute('select max(position_end),max(column_seq) from (select position_start,column_seq,position_end from dm_if_column where table_id=:tbl_id order by 2)',tbl_id=table_id)
c=cursor.fetchall()
pos_end=c[0][0]
column_seq=c[0][1]
cursor.execute('select fields_terminated_by from dm_if_filemask where table_id=:tbl_id and type_id=:ty_id',tbl_id=table_id,ty_id=vf_id)
d=cursor.fetchall()
v_termin=d[0][0]
v_termin=v_termin.replace('\'',"")
v_termin=v_termin.replace("\"",'')
#print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
#print vf_id
#print f_termin,pos_end,column_seq
#print v_termin
if f_num==0:
pass
else:
w=0
i_num=0
v_num=0
i_file=[]
v_file=[]
while w < f_num:
if source_file[w][2].startswith('I'):
i_file.append(source_file[w])
i_num+=1
else:
v_file.append(source_file[w])
v_num+=1
w+=1
z=0
meg=0
m=900
h=200
j=100
f_llth=[]
v_llth=[]
mgg=[]
while z < f_num:
s_name =os.path.join(source_file[z][4],source_file[z][0])
cursor.callproc("common.Log_file_initial",[process_id,source_file[z][1],source_file[z][2] ,tp_id,source_file[z][3]])
cursor.callproc("common.Log_file_start",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],source_file[z][5],source_file[z][6],source_file[z][0],source_file[z][4]])
try:
ff=open(s_name,'r')
except Exception as e:
print (-1,'%s open error %s'%(s_name,e))
line=ff.readline()
line=line.strip('\n')
if source_file[z] in i_file:
if pos_end > 1:
#print "?????????????????????????????????"
#print len(line)
#print pos_end
#print line
if len(line)==pos_end:
m+=1
else:
meg='%s number of field not consistent' %source_file[z][2]
mgg.append(meg)
else:
f_ll=line.split(f_termin)
fl_num=len(f_ll)
f_llth.append(fl_num)
if max(f_llth)<>min(f_llth):
print'9999999999999999999'
cursor.callproc("common.Log_file_fail",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],meg,0,0])
meg='%s number of field not consistent' %source_file[z][2]
mgg.append(meg)
else:
h+=1
#print h
ff.close()
if source_file[z] in v_file:
v_ll=line.split(v_termin)
vl_num=len(v_ll)
v_llth.append(vl_num)
if max(v_llth)<>min(v_llth):
cursor.callproc("common.Log_file_fail",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],meg,0,0])
meg='The number of field not consistent'
print (-1,meg)
else:
j+=1
#print j
ff.close()
z+=1
if len(mgg)>1:
print (-1,mgg)
if meg==0:
#print f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin
return f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin
else:
return 0,0,0,0,0,0,0,0
db.close()
except Exception as e:
return (-1,e)
#merge files
def fun_merge(self):
try:
db=self.conn_db()
cursor=db.cursor()
readsize = 1024
k=0
z=0
f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin=self.fun_colnum()
#print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
#print f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id
if f_num==0:
pass
else:
if len(i_file)==0:
pass
else:
newname1 = 'merge'+i_file[0][0]
targetname1=os.path.join(targetdir,newname1)
t_file1=open(targetname1,'wb')
while z< len(i_file):
if i_file[z] in i_file:
f_name =os.path.join(i_file[z][4],i_file[z][0])
f_file=open(f_name,'rb')
while 1:
filebytes = f_file.read(readsize)
if not filebytes:
break
else:
t_file1.write(filebytes)
t_file1.write(os.linesep)
cursor.callproc("common.Log_file_success",[process_id,i_file[z][1],i_file[z][2],tp_id,i_file[z][3],0])
f_file.close()
z+=1
t_file1.close
if len(v_file)==0:
pass
else:
newname2 = 'merge'+v_file[0][0]
targetname2=os.path.join(targetdir,newname2)
t_file2=open(targetname2,'wb')
a=0
b=0
f_min=0
lst_f=[]
file_line=[]
cursor.execute('select column_seq from DM_IF_VERIFY_FILE where table_id=:tbl_id and if_filename=1',tbl_id=table_id)
c=cursor.fetchall()
column_seq=c[0][0]
while k < len(v_file):
v_name =os.path.join(v_file[k][4],v_file[k][0])
v_f=open(v_name,'rb')
line=v_f.readline()
line=line.strip('\n')
ll=line.split(v_termin)
file_line.append(ll)
cursor.callproc("common.Log_file_success",[process_id,v_file[k][1],v_file[k][2],tp_id,v_file[k][3],0])
v_f.close()
k+=1
#print '9999999999999999999999999999999999999999999'
#print file_line
while a<len(ll):
b=0
f_min=0
while b<len(v_file):
if a==column_seq-1:
f_min=newname2
else:
file_line[b][a]=int(file_line[b][a])
f_min=f_min+file_line[b][a]
b=b+1
lst_f.append(f_min)
a=a+1
#print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
#print lst_f
#print "$$$$$$$$$$$$$$$$$$$$$$$$$$"
q=0
while q<len(lst_f):
lst_f[q]=str(lst_f[q])
q+=1
#print lst_f
fstring="|".join(lst_f)
#print fstring
t_file2.write(fstring)
t_file2.close()
#print 'dddddddddddddddddddddddddddd'
#print k
#print z
if k+z==f_num:
print (0,f_num)
db.close()
except Exception as e:
return (-1,e)
if __name__ == '__main__':
a=kk()
a.fun_merge()