mail.sh

s=`cat ./all.html`

/usr/sbin/sendmail -t -F SenderDisplayerName <<EOF  

SUBJECT: feature 

TO: aaa@aaa.com

MIME-VERSION: 1.0  

Content-type: text/html  

<html>  

<body>  

${s}

</body>  

</html>  

EOF

 

f.py

#-*- coding: utf-8 -*-

import os

import datetime

import time

import pandas as pd

pd.set_option('display.max_columns', None)

ds = time.strftime("%Y-%m-%d")

hour=int(time.strftime("%H"))-1

sql_str = "select source, features from db.table where ds = '{ds}' and hour='{hour}' and modelname='{modelname}'" 

def f(ds, hour, moadelname):

    hql_str = sql_str.format(ds = ds, hour = hour, modelname = modelname)

    shell_str =  'hive -e "{hql_str}">{modelname}'.format(hql_str = hql_str, modelname = modelname)

    os.system(shell_str)

def d(moadelname, html):

    data = pd.read_table(modelname)

    data.columns = ['source', 'features']

    feature_dict = data['features'].values

    feature_dict_new = map(lambda x: eval(x), feature_dict)

    feature_df = pd.DataFrame(feature_dict_new)

    df = pd.merge(feature_df, data, left_index=True, right_index=True)

    df.drop(['features'], inplace=True, axis=1)

    for i in [0, 1]:

        df_copy=df.copy()

        df_source = df_copy[df_copy['source']==i]

        df_source.drop(['source'], inplace=True, axis=1)

        name = modelname+'_source_'+str(i)

        df_source.describe(percentiles=[0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.99]).to_html(name)

        title = '<div style="float:left;">'+ds+' '+str(hour)+', '+name+'</div>'

        with open(name, 'r') as f:

            describe = f.read()

        html = '\n'.join([html, title, describe])

    return html

if __name__ == '__main__':

    s=time.time()

    modelname_list = ['modelname']

    html=''

    for modelname in modelname_list:

        #f(ds, hour, modelname)

        html = d(modelname, html)

    with open('all.html', 'w') as f:

        f.write(html)

    shell_str = 'sh mail.sh'

    os.system(shell_str)

    print((time.time()-s)/60)