小偷程序

#!/bin/bash
#===============================================================
# desc : 定期移动文件到HDFS目录下
# Script Name : mv clikcLog File to NFS
# Code By : frank
# mail : maoxiangyi@jd.com;anjianbing@jd.com
#===============================================================


#初始化配置信息
src_Dir=/export/webClick/
tmp_Dir=/export/webClickTmp/
file_prefix=WebClick_
log_dir=/export/server/real_platform/web_click_log_to_local_file/log/
startShell=/export/server/real_platform/web_click_log_to_local_file/shell/start.sh
stopShell=/export/server/real_platform/web_click_log_to_local_file/shell/stop.sh
hdfs_table=s_h02_click_log

#初始化一些公共的变量
ftpHomeDate=`date +%Y%m%d`
tag=`date +%H:%M`
executeTime=`date +%Y_%m_%d_%H_%M_%S`
tagM=`date +%M`
hostname=`hostname`

#如果log目录不存在,创建目录
function createDir(){
if [ ! -d $1 ]
then
mkdir $1
fi
}

function startScript(){
echo "停止点击流接受脚本。"
sh $startShell
echo "停止点击流接受脚本。"
echo ""
}

function stopScript(){
echo "停止点击流接受脚本。"
sh $stopShell
echo "停止点击流接受脚本。"
echo ""
}

function scan_and_put() {
createDir $log_dir$1
#如果目录不存在,创建目录
tmp_dir_date=$tmp_Dir$1
createDir $tmp_dir_date
log_file=$log_dir$1"/"$executeTime.log
echo "扫描原始文件,将满足需求的文件mv到ftphome目录下"
ls $src_Dir |grep $2 |while read line
do
file_name=${line##*/}

hdfs_file_name=$tmp_dir_date"/"$hostname"_"$file_prefix$executeTime"_"$file_name
mv $src_Dir$line $hdfs_file_name
echo $hdfs_file_name >> $log_file
done
echo "扫描完成..."
#将临时目录下的文件cp到tarDir
if [ -f $log_file ]
then
cat $log_file |while read line
do
file_name=${line##*/}
echo "本次上传文件:"$line
echo "hadoop命令:hadoop fs -put "$line "/apps/hive/warehouse/stage.db/"$hdfs_table"/dt="$3"/"
hadoop fs -put $line /apps/hive/warehouse/stage.db/$hdfs_table/dt=$3/
echo "上传文件结束."$line
done
rm $log_file
fi
}

function normal_sacn_and_put(){

if [ "00:00" = $tag ] || [ "00:01" = $tag ]
then
stopScript
condition="data.lo"
yesterday_dt=`date --date='yesterday' +%Y-%m-%d`
yesterday_ftp_date=`date --date='yesterday' +%Y%m%d`
scan_and_put $yesterday_ftp_date $condition $yesterday_dt
startScript
else
condition="data.log."
today_dt=`date +%Y-%m-%d`
today_ftp_date=`date +%Y%m%d`
scan_and_put $today_ftp_date $condition $today_dt
fi


}

function put_local_file_to_hdfs(){


diffFile=$log_dir"differ_file"$executeTime.log
hdfs_file_list=$log_dir"hdfs_file_list"$executeTime.log
local_file_list=$log_dir"local_file_list"$executeTime.log

hadoop fs -ls /apps/hive/warehouse/stage.db/$hdfs_table/dt=$1/ | awk '{print $8}'|while read line
do
file_name=${line##*/}
suffix=${file_name##*.}
#如果文件正在上传中,视为已经上传成功
if [ "_COPYING_" = "$suffix" ]
then
echo "此文件正在上传:"$line
file_name=${file_name%.*}
fi
echo $file_name >> $hdfs_file_list
done

ls $tmp_Dir$2 |grep $file_prefix >$local_file_list

grep -vxFf $hdfs_file_list $local_file_list > $diffFile

rm $hdfs_file_list
rm $local_file_list

file_size=`cat $diffFile |wc -l`
echo "当前重试的文件个数:"$file_size
if (( "$file_size" >= "1" ))
then
#如果重试的文件大于,触发短信告警信息
#source ~/.base_profile
/soft/java/bin/java -jar /export/server/real_platform/sendmsg/sendmsg.jar "15652306418,18211153576" $hostname"机器下有 "$file_size" 个文件正在重试上传,小偷程序遇到问题了,请查看!"
echo "发送短信成功!"
fi

if [ -f $diffFile ]
then
cat $diffFile |while read line
do
tarFile=$tmp_Dir$2"/"$line
echo "开始上传文件:"$tarFile
echo "hadoop命令:hadoop fs -put "$tarFile "/apps/hive/warehouse/stage.db/"$hdfs_table"/dt="$1"/"
hadoop fs -put $tarFile /apps/hive/warehouse/stage.db/$hdfs_table/dt=$1/
echo "上传文件结束:"$tarFile
done
rm $diffFile
fi

}



function failOver(){
echo "重试机制启动...."
#转钟逻辑处理
if [ "00:00" = $tag ] || [ "00:01" = $tag ]
then
yesterday_dt=`date --date='yesterday' +%Y-%m-%d`
yesterday_ftp_date=`date --date='yesterday' +%Y%m%d`
put_local_file_to_hdfs $yesterday_dt $yesterday_ftp_date
fi
#半小时逻辑处理
if [ "40" = $tagM ] || [ "41" = $tagM ]
then
today_dt=`date +%Y-%m-%d`
today_ftp_date=`date +%Y%m%d`
put_local_file_to_hdfs $today_dt $today_ftp_date
fi
echo "重试机制执行完毕..."
}


function main(){

echo ""
echo "脚本开始执行,开始时间:"`date +%Y:%m:%d_%H:%M:%S`
#正常上传文件
normal_sacn_and_put
#文件没有上传成功后的重试机制
failOver

echo "脚本执行完毕,结束时间:"`date +%Y:%m:%d_%H:%M:%S`
echo ""

}

main

posted on 2019-12-19 15:36  bdcyouth  阅读(57)  评论(0编辑  收藏

导航

统计