1 ### 模板脚本存放路径(无需修改)
2 cd /tmp/fix_data/tmp_wjj_20180322_01
3 ### 脚本名称
4 script=tmp_wjj_20180322_01
5 ### 开始日期(包括当月/天)
6 etl_dt_start='2017-09-01'
7 ### 结束日期(不包括当月/天)
8 etl_dt_end='2016-12-01'
9 ### 并发数(请勿设置高于10)
10 thread_num=3
11 ### task数量(设置spark_sql的task数量,如果数据量过亿,可适量调高)
12 partitions=150
13 ### 以下无需修改
14 thread_no=0
15 spark="beeline -u jdbc:hive2://ip:port -n username -p password --verbose=true"
16 hive="hive -v"
17 excute=${spark}
18 ### 删除执行脚本内容
19 cat /dev/null > run_sql_${script}
20 ### to do: 根据进程数进行判断
21 while [[ ${etl_dt_start} > ${etl_dt_end} ]]
22 do
23 echo ${etl_dt_start}
24 etl_dt_start_str=`echo ${etl_dt_start}|sed 's/-/_/g'`
25 echo "select 'job_start' as flag,'${etl_dt_start}' as num,current_timestamp() as time;" > ${script}_${etl_dt_start_str}
26 echo "set spark.sql.shuffle.partitions=${partitions};" >> ${script}_${etl_dt_start_str}
27 echo "use db_name;" >> ${script}_${etl_dt_start_str}
28 echo "set mapred.job.queue.name=queue_name;" >> ${script}_${etl_dt_start_str}
29 echo "set hive.exec.dynamic.partition=true;" >> ${script}_${etl_dt_start_str}
30 echo "set hive.exec.dynamic.partition.mode=nonstrict;" >> ${script}_${etl_dt_start_str}
31 sed "s/2017-10-01/${etl_dt_start}/g" ${script} >> ${script}_${etl_dt_start_str}
32 echo "select 'job_finish' as flag,'${etl_dt_start}' as num,current_timestamp() as time;" >> ${script}_${etl_dt_start_str}
33
34 thread_no=$((${thread_no}+1))
35 echo "${excute} -f ${script}_${etl_dt_start_str} > ${script}_${etl_dt_start_str}_log 2>&1 &" >>run_sql_${script}
36 if [[ $((${thread_no}%${thread_num})) == 0 ]]
37 then
38 echo "wait" >>run_sql_${script}
39 fi
40 ### etl_dt_start=`date -d "+1 days ${etl_dt_start}" +%Y-%m-%d`
41 etl_dt_start=`date -d "-1 months ${etl_dt_start}" +%Y-%m-%d`
42 done
43
44 ### 执行初始化
45 sh run_sql_${script}
46
47 ### 查看执行时间
48 # grep -E "job_start|job_finish" ${script}_*_log|grep -v select