历史等权算法
编程 规范 ,提高效率 ; 程序的用户使用高效性、健壮性、维护友好性 命令行传参 命令行输入 引入控制台参数 任务的总量 执行的进度 预估执行时间
# coding:utf-8
from tools import *
def del_data_before_write(singleDayDate):
'''
每次执行,需要一个完整的数据源;重置目标表;
:param singleDayDate:
:return:
'''
sql_without_limit = 'DELETE FROM sta_addr WHERE date="{}"'.format(singleDayDate)
for i in range(10000):
print(i)
if i % 10 == 0:
sql = 'SELECT id FROM sta_addr WHERE date="{}" LIMIT 1'.format(singleDayDate)
r = mysql_fetch(sql)
if len(r) == 0:
break
sql = sql_without_limit + ' LIMIT 30000;'
try:
print(sql)
mysql_write(sql)
except Exception as e:
print(e)
def write_(singleDayTable, singleDayDate):
del_data_before_write(singleDayDate)
sql_loop = 'SELECT id,uid,address,modify_time,pv,ip FROM {} LIMIT 10000'.format(singleDayTable)
sql_head = 'INSERT INTO sta_addr (uid,address,date,modify_time,pv,ip) VALUES ';
for _ in range(3000):
try:
r = mysql_fetch(sql_loop)
if r == False:
continue
if len(r) == 0:
break
del_id_l, batch_insert_l = [], []
for i in r:
id_, uid, address, modify_time, pv, ip = i
del_id_l.append(id_)
ll = [uid,
address,
singleDayDate,
modify_time,
pv,
ip]
batch_insert_l.append(' ( "{}" ) '.format('","'.join([str(i) for i in ll])))
batch_insert_sql = '{}{}'.format(sql_head, ','.join(batch_insert_l))
try:
print(_)
mysql_write(batch_insert_sql)
except Exception as e:
print(e)
print(batch_insert_sql)
batch_del_sql = 'DELETE FROM {} WHERE id IN ({});'.format(singleDayTable,
','.join([str(i) for i in del_id_l]))
try:
mysql_write(batch_del_sql)
except Exception as e:
print(e)
print(batch_del_sql)
except Exception as e:
print(e)
if __name__ == '__main__':
singleDayTable = input("请输入数据源表:\n")
singleDayDate = input("请输入处理日期,例如:2018-10-17:\n")
print('您的输入结果:', singleDayTable, singleDayDate, '默认判断输入合法,下面开始执行')
write_(singleDayTable, singleDayDate)
# coding:utf-8
from tools import *
import math
dateFrom = input("请输入处理日期闭区间的左端点,例如:2018-10-17:\n")
dateEnd = input("请输入处理日期闭区间的右端点,例如:2018-10-18:\n")
mktimeFrom, mktimeEnd = date2mktime(dateFrom), date2mktime(dateEnd)
def get_target_date():
timestamp_init = int(time.mktime(time.strptime('2018-09-18', '%Y-%m-%d'))) # 只有从18号开始的日志
d = []
ii = timestamp_init - 60 * 60 * 24
today = time.strftime('%Y-%m-%d', time.localtime(int(time.time())))
for _ in range(60):
ii += 60 * 60 * 24
day = time.strftime('%Y-%m-%d', time.localtime(ii))
if ii == int(time.mktime(time.strptime(today, '%Y-%m-%d'))):
break
d.append(day)
d = sorted(d, reverse=True)
return d
def progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time,
estimatedRemainderMinutesAlgorithm='Historical equal weight algorithm',
date_format='%Y-%m-%d %H:%M:%S'):
'''
:param info: 业务信息
:param raw_data_num: 总局数据总条数
:param db_operation_counter: 进度计数
:param start_time: 开始时间
:param this_time: 统计时间
:param estimatedRemainderMinutesAlgorithm: 剩余时间估计采用历史等权算法
:return:
'''
console_log = {}
console_log['info'], console_log['raw_data_num'], console_log['db_operation_counter'], console_log['start_time'], \
console_log[
'this_time'] = info, raw_data_num, db_operation_counter, start_time, this_time
if db_operation_counter == 0:
estimatedRemainderMinutes = -1
percent = 0
elif raw_data_num == db_operation_counter:
estimatedRemainderMinutes = 0
percent = 1
else:
percent = db_operation_counter / (raw_data_num - db_operation_counter)
if estimatedRemainderMinutesAlgorithm == 'Historical equal weight algorithm':
estimatedRemainderMinutes = (1 / percent - 1) / db_operation_counter * (
date2mktime(this_time, date_format) - date2mktime(start_time, date_format)) / 60
console_log['percent'], console_log['estimatedRemainderMinutes'] = percent, estimatedRemainderMinutes
print(console_log)
sql_head = 'INSERT INTO sta_addr (uid,address,date,modify_time,pv,ip) VALUES ';
target_date_l = get_target_date()
for target_date in target_date_l:
chk_mktime = date2mktime(target_date)
if chk_mktime < mktimeFrom or chk_mktime > mktimeEnd:
continue
mR = {}
for f in file_list:
print(f)
if target_date not in f:
continue
mR_ = mapReduceFile(f)
mR = mergeDic(mR, mR_)
address_l, insert_l = [], []
info, raw_data_num, db_operation_counter, start_time, this_time = target_date, len(mR), 0, getNow(), 0,
progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time)
db_operation_step = 1000
db_raw_data_num = len(mR)
db_operation_times = math.ceil(db_raw_data_num / db_operation_step)
for k in mR:
try:
uid, address, dt = k.split(',')
counter, request_time, ip = mR[k]['counter'], mR[k]['request_time'], mR[k]['ip']
dt = time.strftime('%Y-%m-%d', time.localtime(request_time))
ll = [uid,
address,
dt,
request_time,
counter,
ip]
address_l.append('"{}"'.format(address))
insert_l.append(' ( "{}" ) '.format('","'.join([str(i) for i in ll])))
if len(insert_l) == db_operation_step:
sql = 'DELETE FROM sta_addr WHERE date="{}" AND address IN ({})'.format(target_date,
','.join(address_l))
try:
print('del')
mysql_write(sql)
except Exception as e:
print(e)
print(sql)
sql = '{}{}'.format(sql_head, ','.join(insert_l))
try:
print('insert')
mysql_write(sql)
except Exception as e:
print(e)
print(sql)
address_l, insert_l = [], []
db_operation_counter += db_operation_step
info, raw_data_num, db_operation_counter, start_time, this_time = info, raw_data_num, db_operation_counter, start_time, getNow()
progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time)
except Exception as e:
print(k)
print(e)
try:
if db_operation_times * db_operation_step > db_raw_data_num:
sql = 'DELETE FROM sta_addr WHERE date="{}" AND address IN ({})'.format(target_date,
','.join(address_l))
try:
print('del')
mysql_write(sql)
except Exception as e:
print(e)
print(sql)
sql = '{}{}'.format(sql_head, ','.join(insert_l))
try:
print('insert')
mysql_write(sql)
except Exception as e:
print(e)
print(sql)
db_operation_counter += len(insert_l)
info, raw_data_num, db_operation_counter, start_time, this_time = info, raw_data_num, db_operation_counter, start_time, getNow()
progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time)
del address_l, insert_l
except Exception as e:
print(k)
print(e)
{'this_time': '2018-10-23 10:28:57', 'raw_data_num': 623933, 'db_operation_counter': 94000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 3.0423873924852874e-05, 'info': '2018-10-22', 'percent': 0.17738091419103924}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:58', 'raw_data_num': 623933, 'db_operation_counter': 95000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 3.045143859649123e-05, 'info': '2018-10-22', 'percent': 0.17960686892290706}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:58', 'raw_data_num': 623933, 'db_operation_counter': 96000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.9682895688657406e-05, 'info': '2018-10-22', 'percent': 0.18184125637154716}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:58', 'raw_data_num': 623933, 'db_operation_counter': 97000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.89394090764162e-05, 'info': '2018-10-22', 'percent': 0.18408412454714357}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:59', 'raw_data_num': 623933, 'db_operation_counter': 98000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.8962562473969184e-05, 'info': '2018-10-22', 'percent': 0.18633552182502333}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:59', 'raw_data_num': 623933, 'db_operation_counter': 99000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.8247775737169676e-05, 'info': '2018-10-22', 'percent': 0.18859549694913447}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:00', 'raw_data_num': 623933, 'db_operation_counter': 100000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.82622e-05, 'info': '2018-10-22', 'percent': 0.1908640990355637}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:00', 'raw_data_num': 623933, 'db_operation_counter': 101000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.7574616867627353e-05, 'info': '2018-10-22', 'percent': 0.19314137757609484}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:00', 'raw_data_num': 623933, 'db_operation_counter': 102000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.6908432654107393e-05, 'info': '2018-10-22', 'percent': 0.19542738244180766}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:01', 'raw_data_num': 623933, 'db_operation_counter': 103000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.6919365632953152e-05, 'info': '2018-10-22', 'percent': 0.19772216388671865}
del
write-ok
insert
write-ok

浙公网安备 33010602011771号