qrj大佬脚本
[root@kunpeng1 update-hive]# cat schedule.sh #!/bin/bash echo "=========start 1 level hive table distcp===========" sh /root/qrj/to-leap/update-hive/hive-cp-update-part1.sh $1 echo "=========start 3 level hive table distcp===========" sh /root/qrj/to-leap/update-hive/hive-cp-update-part3.sh $1 if [ $? -eq 0 ];then ssh 172.25.203.76 "sh /root/qrj/load-hive-part.sh" fi [root@kunpeng1 update-hive]# cat /root/qrj/to-leap/update-hive/hive-cp-update-part1.sh #!/bin/bash # define hdfs and hadoop command path hdfs_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hdfs hadoop_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hadoop export JAVA_HOME=/opt/java1.8/jdk # define the src hdfs path and the target hdfs path, please change them according to the real hdfs path src_hdfspath="hdfs://yumcluster/apps/hive/warehouse" target_hdfspath="webhdfs://172.25.203.76:50070/apps/hive/warehouse" # the tables which want to copy tbs=( flat_tld_header kfc_recsys_product_supply_daily mcnt_itemcode_key_info kfc_recsys_tradeup_stats_daily kfc_recsys_orders_hit kfc_recsys_tradeup_filter_results kfc_recsys_tradeup_city_recall_daily kfc_recsys_tradeup_user_recall_daily ph_recsys_orders_hit ph_recsys_tradeup_filter_results ) # get the shell script dir path scriptpath=$0 if [ "${scriptpath:0:1}" = "/" ];then curdir=`dirname ${scriptpath}` else curdir="`pwd`"/"`dirname ${scriptpath}`" fi echo "the execute dir path is $curdir" # path of logs dir logsdir="${curdir}/logs" # create the log dir if [ ! -d ${logsdir}/distcp_logs ];then mkdir -p ${logsdir}/distcp_logs fi if [ ! -d ${logsdir}/cp_parti_log ];then mkdir -p ${logsdir}/cp_parti_log fi # define time if [ -n "$1" ];then startday=$1 else startday=`date "+%Y%m%d"` fi echo "startday is $startday" onedayago=`date -d "$startday -1 day" "+%Y%m%d"` twodayago=`date -d "$startday -2 day" "+%Y%m%d"` twodayago_format=`date -d "$startday -2 day" "+%Y-%m-%d"` threedayago=`date -d "$startday -3 day" "+%Y%m%d"` echo "there are ${#tbs[*]} tables waiting to copy" for tb in ${tbs[*]} do # hadoop user case "$tb" in ph_recsys_orders_hit|ph_recsys_tradeup_filter_results) hadoop_user=srv_kp_phdelivery ;; *) hadoop_user=srv_kp_recommend ;; esac # kerberos auth kinit ${hadoop_user}@CN.YUMCHINA.COM -kt ${curdir}/${hadoop_user}.keytab # database path of the table case "$tb" in flat_tld_header) dir1="dw/kfc" ;; mcnt_itemcode_key_info) dir1="ods/all" ;; ph_recsys_orders_hit|ph_recsys_tradeup_filter_results) dir1="aie_phdelivery.db" ;; *) dir1="aie_recommendation.db" ;; esac # partition key case "$tb" in mcnt_itemcode_key_info) partkey="p_date_key" ;; kfc_recsys_orders_hit|kfc_recsys_tradeup_filter_results|ph_recsys_orders_hit|ph_recsys_tradeup_filter_results) partkey="biz_date" ;; *) partkey="p_biz_date" ;; esac # partition value case "$tb" in kfc_recsys_product_supply_daily|mcnt_itemcode_key_info) partvalue=${onedayago} ;; flat_tld_header) partvalue=${twodayago} ;; kfc_recsys_orders_hit|kfc_recsys_tradeup_filter_results|ph_recsys_orders_hit|ph_recsys_tradeup_filter_results) partvalue=${twodayago_format} ;; *) partvalue=${threedayago} ;; esac # test the partition file of the table from the src hdfs path ${hdfs_cmd} dfs -ls ${src_hdfspath}/${dir1}/${tb}/${partkey}=${partvalue} > /dev/null 2>&1 if [ $? -eq 0 ];then echo "${partkey}=${partvalue} file of table $tb is exist" echo "start to copy partition ${partkey}=${partvalue} of table $tb" ${hadoop_cmd} distcp -D mapreduce.job.queuename=root.kp.recomend -D ipc.client.fallback-to-simple-auth-allowed=true -bandwidth 10 -m 10 ${src_hdfspath}/${dir1}/${tb}/${partkey}=${partvalue} ${target_hdfspath}/${dir1}/${tb}/ >> ${logsdir}/distcp_logs/distcp_`date +%F`.log 2>&1 if [ $? -eq 0 ];then echo "copy partition $tb ${partkey}=${partvalue} ok" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.ok echo "copy partition $tb ${partkey}=${partvalue} sucessed" else echo "copy partition $tb ${partkey}=${partvalue} error" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err echo "copy partition $tb ${partkey}=${partvalue} failed" sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk' -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "copy partition of hive table $tb ${partkey}=${partvalue} failed" -l "http://m.baidu.com" fi else echo "${partkey}=${partvalue} file of table $tb is not exist" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk' -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "partition of hive table $tb ${partkey}=${partvalue} is not exist" -l "http://m.baidu.com" fi done
[root@kunpeng1 update-hive]# cat /root/qrj/to-leap/update-hive/hive-cp-update-part3.sh #!/bin/bash # define hdfs and hadoop command path hdfs_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hdfs hadoop_cmd=/mnt/primary-hadoop-client/hadoop-2.6.0-cdh5.15.1/bin/hadoop export JAVA_HOME=/opt/java1.8/jdk # define the src hdfs path and the target hdfs path, please change them according to the real hdfs path src_hdfspath="hdfs://yumcluster/apps/hive/warehouse" target_hdfspath="webhdfs://172.25.203.76:50070/apps/hive/warehouse" hadoop_user=srv_kp_recommend # the tables which want to copy tbs=( recsys_fusion_proxylog_tradeup_shoppingcart_parquet recsys_fusion_proxylog_tradeup_shoppingcart_itemlinkids_parquet recsys_fusion_order_parquet recsys_fusion_order_orderitems_parquet recsys_fusion_store_product_linkids_parquet recsys_fusion_recommend_filter_list_parquet recsys_fusion_recommend_filter_parquet recsys_fusion_tradeup_product_parquet ) # get the shell script dir path scriptpath=$0 if [ "${scriptpath:0:1}" = "/" ];then curdir=`dirname ${scriptpath}` else curdir="`pwd`"/"`dirname ${scriptpath}`" fi echo "the execute dir path is $curdir" # path of logs dir logsdir="${curdir}/logs" # create the log dir if [ ! -d ${logsdir}/distcp_logs ];then mkdir -p ${logsdir}/distcp_logs fi if [ ! -d ${logsdir}/cp_parti_log ];then mkdir -p ${logsdir}/cp_parti_log fi # kerberos auth kinit ${hadoop_user}@CN.YUMCHINA.COM -kt ${curdir}/${hadoop_user}.keytab # define time if [ -n "$1" ];then startday=$1 else startday=`date "+%Y%m%d"` fi echo "startday is $startday" twodayago=`date -d "$startday -2 day" "+%Y-%m-%d"` year=${twodayago:0:4} month=${twodayago:5:2} day=${twodayago:8:2} if [ ${month:0:1} -eq 0 ];then mon=${month:1:1} else mon=$month fi if [ ${day:0:1} -eq 0 ];then da=${day:1:1} else da=$day fi echo "there are ${#tbs[*]} tables waiting to copy" for tb in ${tbs[*]} do # database path of the table case "$tb" in flat_tld_header) dir1="dw/kfc" ;; mcnt_itemcode_key_info) dir1="ods/all" ;; *) dir1="aie_recommendation.db" ;; esac # mkdir first ssh 172.25.203.76 "su - hive -c 'hdfs dfs -ls hdfs://172.25.203.76:8020/apps/hive/warehouse/${dir1}/${tb}/year=${year}/month=${mon}'" > /dev/null 2>&1 if [ $? -ne 0 ];then echo "create the month dir" ssh 172.25.203.76 "su - hive -c 'hdfs dfs -mkdir -p hdfs://172.25.203.76:8020/apps/hive/warehouse/${dir1}/${tb}/year=${year}/month=${mon}'" fi # test the partition file of the table from the src hdfs path ${hdfs_cmd} dfs -ls ${src_hdfspath}/${dir1}/${tb}/year=${year}/month=${mon}/day=${da} > /dev/null 2>&1 if [ $? -eq 0 ];then echo "start to copy partition year=${year}/month=${mon}/day=${da} of table $tb" ${hadoop_cmd} distcp -D mapreduce.job.queuename=root.kp.recomend -D ipc.client.fallback-to-simple-auth-allowed=true -bandwidth 10 -m 10 ${src_hdfspath}/${dir1}/${tb}/year=${year}/month=${mon}/day=${da} ${target_hdfspath}/${dir1}/${tb}/year=${year}/month=${mon}/ >> ${logsdir}/distcp_logs/distcp_`date +%F`.log 2>&1 if [ $? -eq 0 ];then echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb ok" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.ok echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb sucessed" else echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb error" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err echo "copy partition year=${year}/month=${mon}/day=${da} of table $tb failed" sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk' -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "copy partition of hive table $tb year=${year}/month=${mon}/day=${da} failed" -l "http://m.baidu.com" fi else echo "partition year=${year}/month=${mon}/day=${da} of table $tb is not exist" >> ${logsdir}/cp_parti_log/${tb}_parti_`date +%F`.err sh ${curdir}/message2wechat.sh -u 'oBLAUwMj09FxXDiljGGUzy7nyIJ8,oBLAUwKPZ-KqWSmGWPHEFORbkxjo,oBLAUwB4-72Y-3XwTyoKk4SAVlWo,oBLAUwNFATpsFx4rM1s9TTdTihuk' -s "copy hive partition failed" -n "hive table copy" -t `date +%F_%T` -d "partition of hive table $tb year=${year}/month=${mon}/day=${da} is not exist" -l "http://m.baidu.com" fi done
[root@mltraining01 qrj]# cat load-hive-part.sh #!/bin/bash tbs=( flat_tld_header recsys_fusion_proxylog_tradeup_shoppingcart_parquet recsys_fusion_proxylog_tradeup_shoppingcart_itemlinkids_parquet kfc_recsys_product_supply_daily recsys_fusion_order_parquet recsys_fusion_order_orderitems_parquet mcnt_itemcode_key_info kfc_recsys_tradeup_stats_daily kfc_recsys_orders_hit kfc_recsys_tradeup_filter_results recsys_fusion_store_product_linkids_parquet recsys_fusion_recommend_filter_list_parquet recsys_fusion_recommend_filter_parquet recsys_fusion_tradeup_product_parquet kfc_recsys_tradeup_city_recall_daily kfc_recsys_tradeup_user_recall_daily ph_recsys_orders_hit ph_recsys_tradeup_filter_results ) for tb in ${tbs[*]} do # database of the table case "$tb" in flat_tld_header) db="dw_kfc" ;; mcnt_itemcode_key_info) db="ods_all" ;; ph_recsys_orders_hit|ph_recsys_tradeup_filter_results) db="aie_phdelivery" ;; *) db="aie_recommendation" ;; esac beeline -u "jdbc:hive2://172.25.203.76:10000/${db}" -n hive -e "MSCK REPAIR TABLE ${tb};" > /root/qrj/load_hive_log/${tb}_`date +%F`.log 2>&1 done

浙公网安备 33010602011771号