#!/bin/bash
begin_date=$1
end_date=$2
target_table=bst_agg_car_driver_work_time_d
v_dt_year=${begin_date:0:4}
echo $v_dt_year,$begin_date,$end_date
query_sql="
with tmp as
(select a.id,
a.driver_no,
CASE
WHEN a.business_no = 'hailing' THEN
'CAR_HAILING'
ELSE
a.business_no
END AS business_no,
a.work_vehicle_no,
a.work_start_time,
a.work_end_time,
begin_date,
end_date,
unix_timestamp(end_date, 'yyyy-MM-dd HH:mm:ss')- unix_timestamp(begin_date, 'yyyy-MM-dd HH:mm:ss') date_second_diff
from (SELECT *,
row_number() over(PARTITION BY t.id ORDER BY t.etl_date DESC) AS rn
FROM dw_car_center.bst_bas_driver_info_work_time t
WHERE dt_year = '${v_dt_year}') a lateral view datemap(date_format(work_start_time, 'yyyy-MM-dd HH:mm:ss'), date_format(work_end_time, 'yyyy-MM-dd HH:mm:ss')) t as begin_date,
end_date
where a.rn = 1
)
insert overwrite table ${target_table} partition(dt_year='${v_dt_year}')
SELECT date_format(dwt.work_start_time, 'yyyyMMdd') AS work_start_date,
date_format(dwt.work_end_time, 'yyyyMMdd') AS work_end_date,
df.provider_no,
df.supplier_no,
dwt.driver_no,
df.driver_name,
df.telephone,
dwt.business_no,
vf.vehicle_num,
vf.vehicle_no,
dwt.work_start_time,
dwt.work_end_time,
dwt.date_second_diff,
current_timestamp() as etl_date
FROM tmp dwt
LEFT JOIN dw_car_center.bst_par_car_service_driver_info df
ON dwt.driver_no = df.driver_no
LEFT JOIN dw_car_center.bst_par_car_service_vehicle_info vf
ON dwt.work_vehicle_no = vf.vehicle_no
"
echo 'start execute hive sql !'
hive_sql="
use dw_car_center;
set hive.exec.parallel=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.merge.mapfiles = true;
set hive.merge.mapredfiles=true;
set hive.merge.size.per.task = 256000000;
set hive.merge.smallfiles.avgsize=16000000;
alter table ${target_table} drop partition(dt_year='${v_dt_year}');
alter table ${target_table} add partition(dt_year='${v_dt_year}');
${query_sql};
"
echo $hive_sql
hive -e "$hive_sql"
echo 'hive sql is execute OK !'