//雪花飘落特效 //右上角github跳转   

qrj大佬脚本

 
 
[root@kunpeng1 update-hive]# cat schedule.sh
#!/bin/bash

echo "=========start 1 level hive table distcp==========="
sh /root/qrj/to-leap/update-hive/hive-cp-update-part1.sh $1

echo "=========start 3 level hive table distcp==========="
sh /root/qrj/to-leap/update-hive/hive-cp-update-part3.sh $1

if [ $? -eq 0 ];then
         ssh 172.25.203.76 "sh /root/qrj/load-hive-part.sh"
fi
[root@kunpeng1 update-hive]# cat /root/qrj/to-leap/update-hive/hive-cp-update-part1.sh
#!/bin/bash

# define hdfs and hadoop command path
hdfs_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hdfs
hadoop_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hadoop
export JAVA_HOME=/opt/java1.8/jdk

# define the src hdfs path and the target hdfs path, please change them according to the real hdfs path
src_hdfspath="hdfs://yumcluster/apps/hive/warehouse"
target_hdfspath="webhdfs://172.25.203.76:50070/apps/hive/warehouse"

# the tables which want to copy
tbs=(
flat_tld_header
kfc_recsys_product_supply_daily
mcnt_itemcode_key_info
kfc_recsys_tradeup_stats_daily
kfc_recsys_orders_hit
kfc_recsys_tradeup_filter_results
kfc_recsys_tradeup_city_recall_daily
kfc_recsys_tradeup_user_recall_daily
ph_recsys_orders_hit
ph_recsys_tradeup_filter_results
)

# get the shell script dir path
scriptpath=$0
if [ "${scriptpath:0:1}" = "/" ];then
        curdir=`dirname ${scriptpath}`
else
        curdir="`pwd`"/"`dirname ${scriptpath}`"
fi

echo "the execute dir path is $curdir"

# path of logs dir
logsdir="${curdir}/logs"

# create the log dir
if [ ! -d ${logsdir}/distcp_logs ];then
        mkdir -p ${logsdir}/distcp_logs
fi

if [ ! -d ${logsdir}/cp_parti_log ];then
        mkdir -p ${logsdir}/cp_parti_log
fi

# define time
if [ -n "$1" ];then
        startday=$1
else
        startday=`date "+%Y%m%d"`
fi

echo "startday is $startday"

onedayago=`date -d "$startday -1 day" "+%Y%m%d"`
twodayago=`date -d "$startday -2 day" "+%Y%m%d"`
twodayago_format=`date -d "$startday -2 day" "+%Y-%m-%d"`
threedayago=`date -d "$startday -3 day" "+%Y%m%d"`


echo "there are ${#tbs[*]} tables waiting to copy"

for tb in  ${tbs[*]}
do
        # hadoop user
        case "$tb" in
                ph_recsys_orders_hit|ph_recsys_tradeup_filter_results)
                        hadoop_user=srv_kp_phdelivery
                        ;;
                *)
                        hadoop_user=srv_kp_recommend
                        ;;
        esac

        # kerberos auth
        kinit ${hadoop_user}@CN.YUMCHINA.COM -kt ${curdir}/${hadoop_user}.keytab

        # database path of the table
        case "$tb" in
                flat_tld_header)
                        dir1="dw/kfc"
                        ;;
                mcnt_itemcode_key_info)
                        dir1="ods/all"
                        ;;
                ph_recsys_orders_hit|ph_recsys_tradeup_filter_results)
                        dir1="aie_phdelivery.db"
                        ;;
                *)
                        dir1="aie_recommendation.db"
                        ;;
        esac

        # partition key
        case "$tb" in
                mcnt_itemcode_key_info)
                        partkey="p_date_key"
                        ;;
                kfc_recsys_orders_hit|kfc_recsys_tradeup_filter_results|ph_recsys_orders_hit|ph_recsys_tradeup_filter_results)
                        partkey="biz_date"
                        ;;
                *)
                        partkey="p_biz_date"
                        ;;
        esac

        # partition value
        case "$tb" in
                kfc_recsys_product_supply_daily|mcnt_itemcode_key_info)
                        partvalue=${onedayago}
                        ;;
                flat_tld_header)
                        partvalue=${twodayago}
                        ;;
                kfc_recsys_orders_hit|kfc_recsys_tradeup_filter_results|ph_recsys_orders_hit|ph_recsys_tradeup_filter_results)
                        partvalue=${twodayago_format}
                        ;;
                *)
                        partvalue=${threedayago}
                        ;;
        esac

        # test the partition file of the table from the src hdfs path
        ${hdfs_cmd} dfs -ls ${src_hdfspath}/${dir1}/${tb}/${partkey}=${partvalue} > /dev/null 2>&1

        if [ $? -eq 0 ];then
                echo "${partkey}=${partvalue} file of table $tb is exist"
                echo "start to copy partition ${partkey}=${partvalue} of table $tb"
                ${hadoop_cmd} distcp -D mapreduce.job.queuename=root.kp.recomend -D ipc.client.fallback-to-simple-auth-allowed=true -bandwidth 10 -m 10 ${src_hdfspath}/${dir1}/${tb}/${partkey}=${partvalue} ${target_hdfspath}/${dir1}/${tb}/ >> ${logsdir}/distcp_logs/distcp_`date +%F`.log 2>&1

                if [ $? -eq 0 ];then
                        echo "copy partition $tb ${partkey}=${partvalue} ok" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.ok
                        echo "copy partition $tb ${partkey}=${partvalue} sucessed"
                else
                        echo "copy partition $tb ${partkey}=${partvalue} error" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err
                        echo "copy partition $tb ${partkey}=${partvalue} failed"
                        sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk'  -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "copy partition of hive table $tb ${partkey}=${partvalue} failed" -l "http://m.baidu.com"
                fi
        else
                echo "${partkey}=${partvalue} file of table $tb is not exist" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err
                sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk'  -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "partition of hive table $tb ${partkey}=${partvalue} is not exist" -l "http://m.baidu.com"
        fi
done
hive-cp-update-part1.sh
[root@kunpeng1 update-hive]# cat /root/qrj/to-leap/update-hive/hive-cp-update-part3.sh
#!/bin/bash

# define hdfs and hadoop command path
hdfs_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hdfs
hadoop_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hadoop
export JAVA_HOME=/opt/java1.8/jdk

# define the src hdfs path and the target hdfs path, please change them according to the real hdfs path
src_hdfspath="hdfs://yumcluster/apps/hive/warehouse"
target_hdfspath="webhdfs://172.25.203.76:50070/apps/hive/warehouse"
hadoop_user=srv_kp_recommend

# the tables which want to copy
tbs=(
recsys_fusion_proxylog_tradeup_shoppingcart_parquet
recsys_fusion_proxylog_tradeup_shoppingcart_itemlinkids_parquet
recsys_fusion_order_parquet
recsys_fusion_order_orderitems_parquet
recsys_fusion_store_product_linkids_parquet
recsys_fusion_recommend_filter_list_parquet
recsys_fusion_recommend_filter_parquet
recsys_fusion_tradeup_product_parquet
)

# get the shell script dir path
scriptpath=$0
if [ "${scriptpath:0:1}" = "/" ];then
        curdir=`dirname ${scriptpath}`
else
        curdir="`pwd`"/"`dirname ${scriptpath}`"
fi

echo "the execute dir path is $curdir"

# path of logs dir
logsdir="${curdir}/logs"

# create the log dir
if [ ! -d ${logsdir}/distcp_logs ];then
        mkdir -p ${logsdir}/distcp_logs
fi

if [ ! -d ${logsdir}/cp_parti_log ];then
        mkdir -p ${logsdir}/cp_parti_log
fi


# kerberos auth
kinit ${hadoop_user}@CN.YUMCHINA.COM -kt ${curdir}/${hadoop_user}.keytab

# define time
if [ -n "$1" ];then
        startday=$1
else
        startday=`date "+%Y%m%d"`
fi

echo "startday is $startday"

twodayago=`date -d "$startday -2 day" "+%Y-%m-%d"`
year=${twodayago:0:4}
month=${twodayago:5:2}
day=${twodayago:8:2}


if [ ${month:0:1} -eq 0 ];then
        mon=${month:1:1}
else
        mon=$month
fi


if [ ${day:0:1} -eq 0 ];then
        da=${day:1:1}
else
        da=$day
fi


echo "there are ${#tbs[*]} tables waiting to copy"

for tb in  ${tbs[*]}
do
        # database path of the table
        case "$tb" in
                flat_tld_header)
                        dir1="dw/kfc"
                        ;;
                mcnt_itemcode_key_info)
                        dir1="ods/all"
                        ;;
                *)
                        dir1="aie_recommendation.db"
                        ;;
        esac

        # mkdir first
        ssh 172.25.203.76 "su - hive -c 'hdfs dfs -ls hdfs://172.25.203.76:8020/apps/hive/warehouse/${dir1}/${tb}/year=${year}/month=${mon}'" > /dev/null 2>&1
        if [ $? -ne 0 ];then
                echo "create the month dir"
                ssh 172.25.203.76 "su - hive -c 'hdfs dfs -mkdir -p hdfs://172.25.203.76:8020/apps/hive/warehouse/${dir1}/${tb}/year=${year}/month=${mon}'"
        fi

        # test the partition file of the table from the src hdfs path
        ${hdfs_cmd} dfs -ls ${src_hdfspath}/${dir1}/${tb}/year=${year}/month=${mon}/day=${da} > /dev/null 2>&1

        if [ $? -eq 0 ];then
                echo "start to copy partition year=${year}/month=${mon}/day=${da} of table $tb"
                ${hadoop_cmd} distcp -D mapreduce.job.queuename=root.kp.recomend -D ipc.client.fallback-to-simple-auth-allowed=true -bandwidth 10 -m 10 ${src_hdfspath}/${dir1}/${tb}/year=${year}/month=${mon}/day=${da} ${target_hdfspath}/${dir1}/${tb}/year=${year}/month=${mon}/ >> ${logsdir}/distcp_logs/distcp_`date +%F`.log 2>&1

                if [ $? -eq 0 ];then
                        echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb ok" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.ok
                        echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb sucessed"
                else
                        echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb error" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err
                        echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb failed"
                        sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk'  -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "copy partition of hive table $tb year=${year}/month=${mon}/day=${da} failed" -l "http://m.baidu.com"
                fi
        else
                echo "partition year=${year}/month=${mon}/day=${da} of table $tb is not exist" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err
                sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk'  -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "partition of hive table $tb year=${year}/month=${mon}/day=${da} is not exist" -l "http://m.baidu.com"
        fi

done
hive-cp-update-part3.sh

 

 
[root@mltraining01 qrj]# cat load-hive-part.sh
#!/bin/bash

tbs=(
flat_tld_header
recsys_fusion_proxylog_tradeup_shoppingcart_parquet
recsys_fusion_proxylog_tradeup_shoppingcart_itemlinkids_parquet
kfc_recsys_product_supply_daily
recsys_fusion_order_parquet
recsys_fusion_order_orderitems_parquet
mcnt_itemcode_key_info
kfc_recsys_tradeup_stats_daily
kfc_recsys_orders_hit
kfc_recsys_tradeup_filter_results
recsys_fusion_store_product_linkids_parquet
recsys_fusion_recommend_filter_list_parquet
recsys_fusion_recommend_filter_parquet
recsys_fusion_tradeup_product_parquet
kfc_recsys_tradeup_city_recall_daily
kfc_recsys_tradeup_user_recall_daily
ph_recsys_orders_hit
ph_recsys_tradeup_filter_results
)


for tb in  ${tbs[*]}
do
        # database  of the table
        case "$tb" in
                flat_tld_header)
                        db="dw_kfc"
                        ;;
                mcnt_itemcode_key_info)
                        db="ods_all"
                        ;;
                ph_recsys_orders_hit|ph_recsys_tradeup_filter_results)
                        db="aie_phdelivery"
                        ;;
                *)
                        db="aie_recommendation"
                        ;;
        esac

        beeline -u "jdbc:hive2://172.25.203.76:10000/${db}" -n hive -e "MSCK REPAIR TABLE ${tb};" > /root/qrj/load_hive_log/${tb}_`date +%F`.log 2>&1
done
load-hive-part

 

posted @ 2020-10-15 11:45  农夫运维  阅读(133)  评论(0)    收藏  举报