'''
由于oracl数据库导出的日期格式存在问题,如TO_TIMESTAMP('2017040110310003100000', 'YYYYMMDDHH24MISSFF6'),日期多了两个0
需要将文件中不符合规范的数据使用正则表达式找出并将处理完毕的数据写入新的文件
'''
def FilePronew():
f = open(r'D:/jie/filepro/1.sql')
fw = open(r'D:/jie/filepro/111.sql','w+')
for line in f.readlines():
for i in range(10):
if(re.findall(r'TO_TIMESTAMP\(\'*\d{21}', line)):
str = re.findall(r'TO_TIMESTAMP\(\'*\d{21}', line)
for s in str:
line = line.replace(s,s[:-1])
else:
break
fw.write(line)
f.close()
fw.close()
'''
一个文件中存在多个日期的记录,且记录不是按照日期顺序存储,需要将不同日期的数据分割
使用正则表达式匹配,每一行写一次文件
'''
def SplitFileByDate():
f = open(r'D:/jie/data/gongxiang/TRANS_REFUND.sql','r')
temp = ''
for line in f.readlines():
if(re.findall(r'TO_TIMESTAMP\(\' \d{4}-\d{2}-\d{2}', line)):
temp = (re.findall(r'TO_TIMESTAMP\(\' \d{4}-\d{2}-\d{2}', line)[0][15:25]).replace('-','')
print temp
fw = open('TRANS_REFUND_'+temp+'.sql','a')
fw.write(line)
fw.close()
'''
将文件按照文件名规则分类到不同的文件夹下
并将整理后的文件夹压缩
'''
def RARFilebyname():
targetDir = 'D:/etc0901/etc0901'
for files in os.listdir('D:/etc0901/etc0901'):
if os.path.isfile(targetDir+'/'+files):
s = files[11:17]
if not os.path.exists(targetDir+'/'+s):
os.makedirs(targetDir+'/'+s)
shutil.copyfile(targetDir+'/'+files,targetDir+'/'+s+'/'+files)
dirlist=os.listdir(targetDir)
for dir in dirlist:
print targetDir+'/'+dir
if os.path.isdir(targetDir+'/'+dir):
shutil.make_archive('etc_'+dir, 'zip', targetDir+'/'+dir)
from struct import *
'''
大端小端问题:http://blog.csdn.net/fan_hai_ping/article/details/8424360
http://blog.csdn.net/lis_12/article/details/52777983
'''
def ReadYKTFile():
A3 = '<bqqhhhiibhiq'
f = open(u'D:\\jie\\02_测试数据\\YKT\\20170901\\52013170901000000001J0','rb')
#<小端模式
print unpack('<bbbhhhiibbqqqqqqqqqq',f.read(1+1+1+2+2+2+4+4+1+1+8+8+8+8+8+8+8+8+8+8))
f.close()
f = open(u'D:\\jie\\02_测试数据\\YKT\\20170901\\52013170901000000001J0','rb')
print unpack('@bbbhhhiibbqqqqqq',f.read(72))
f.close()
#print calcsize("bbbhhhiibbqqqqqqqqqq")
#print calcsize("<bbbhhhiibbqqqqqqqqqq")
print BCDtoDatetime(17372960,4) #20170901,日期格式
print BCDtoDatetime(320869749, 4)
'''
BCD编码是4个bit位表示一个数字
参数:x需要转换的数字,y字节数
'''
def BCDtoDatetime(x,y):
#s = str(bin(x))
s = ''
temp = ''
for i in range(y*2):
#print i,x
m,n = divmod(x,pow(16,y*2-(i+1)))
#print m,n
x = n
if((i)%2>0):
temp = temp + str(m)
#print temp
s = str(temp)+s
temp = ''
else:
temp = str(m)
return s