mail.sh
s=`cat ./all.html`
/usr/sbin/sendmail -t -F SenderDisplayerName <<EOF
SUBJECT: feature
TO: aaa@aaa.com
MIME-VERSION: 1.0
Content-type: text/html
<html>
<body>
${s}
</body>
</html>
EOF
f.py
#-*- coding: utf-8 -*-
import os
import datetime
import time
import pandas as pd
pd.set_option('display.max_columns', None)
ds = time.strftime("%Y-%m-%d")
hour=int(time.strftime("%H"))-1
sql_str = "select source, features from db.table where ds = '{ds}' and hour='{hour}' and modelname='{modelname}'"
def f(ds, hour, moadelname):
hql_str = sql_str.format(ds = ds, hour = hour, modelname = modelname)
shell_str = 'hive -e "{hql_str}">{modelname}'.format(hql_str = hql_str, modelname = modelname)
os.system(shell_str)
def d(moadelname, html):
data = pd.read_table(modelname)
data.columns = ['source', 'features']
feature_dict = data['features'].values
feature_dict_new = map(lambda x: eval(x), feature_dict)
feature_df = pd.DataFrame(feature_dict_new)
df = pd.merge(feature_df, data, left_index=True, right_index=True)
df.drop(['features'], inplace=True, axis=1)
for i in [0, 1]:
df_copy=df.copy()
df_source = df_copy[df_copy['source']==i]
df_source.drop(['source'], inplace=True, axis=1)
name = modelname+'_source_'+str(i)
df_source.describe(percentiles=[0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.99]).to_html(name)
title = '<div style="float:left;">'+ds+' '+str(hour)+', '+name+'</div>'
with open(name, 'r') as f:
describe = f.read()
html = '\n'.join([html, title, describe])
return html
if __name__ == '__main__':
s=time.time()
modelname_list = ['modelname']
html=''
for modelname in modelname_list:
#f(ds, hour, modelname)
html = d(modelname, html)
with open('all.html', 'w') as f:
f.write(html)
shell_str = 'sh mail.sh'
os.system(shell_str)
print((time.time()-s)/60)