hadoop之hive高级操作

在输出结果较多,需要输出到文件中时,可以在hive CLI之外执行hive -e "sql" > output.txt操作

但当SQL语句太长或太多时,这种方式不是很方便,可以考虑将SQL语句存为sql.hql文件中,然后执行 hive -f sql.hql >output.txt操作

如果是多个语句,且要输出到多个文件,只好把SQL写在shell脚本中,下面附一个例子

start_day=$1
end_day=$2
start_date=`date  +"%Y-%m-%d" -d  "${start_day}"`
end_date=`date  +"%Y-%m-%d" -d  "${end_day}"`


active="
use ycappdata;
select ctl_dt,'active' ,count(distinct dvid) from sa_daydau_detail 
where ctl_dt between '${start_date}' and '${end_date}'
group by ctl_dt,'active' ;"



loss="
use ycappdata;
select date_add(from_unixtime(unix_timestamp(lastactivedate,'yyyy/MM/dd hh:mm:ss'),'yyyy-MM-dd'),15),'loss' ,count(distinct deviceid) from ext_db_apploginstats
where from_unixtime(unix_timestamp(lastactivedate,'yyyy/MM/dd hh:mm:ss'),'yyyy-MM-dd') between date_sub('${start_date}',15) and date_sub('${end_date}',15)
group by date_add(from_unixtime(unix_timestamp(lastactivedate,'yyyy/MM/dd hh:mm:ss'),'yyyy-MM-dd'),15),'loss';"


active_month_distribute="
use ycappdata;
select a.ctl_dt,'active_month_distribute',concat('m',month(start_dt)),count(distinct b.dvid) from 
(select ctl_dt,dvid from sa_daydau_detail where ctl_dt between '${start_date}' and '${end_date}')a 
left outer join 
(select start_dt,dvid from sa_firststartdate_dvid where start_dt between '2017-01-01' and '${end_date}')b 
on lower(a.dvid)=lower(b.dvid)
group by a.ctl_dt,'active_month_distribute',concat('m',month(start_dt)) ;"

active_date_distribute="
use ycappdata;
select a.ctl_dt,'active_date_distribute',
case when datediff(a.ctl_dt,b.start_dt)=0 then 'd0' when datediff(a.ctl_dt,b.start_dt)<=30 then 'd30'
when datediff(a.ctl_dt,b.start_dt)<=60 then 'd60'  when datediff(a.ctl_dt,b.start_dt)<=90 then 'd90'
when datediff(a.ctl_dt,b.start_dt)<=120 then 'd120' when datediff(a.ctl_dt,b.start_dt)<=150 then 'd150'
when datediff(a.ctl_dt,b.start_dt)<=180 then 'd180' else 'd181' end,count(distinct b.dvid) from 
(select ctl_dt,dvid from sa_daydau_detail where ctl_dt between '${start_date}' and '${end_date}')a 
left outer join 
(select start_dt,dvid from sa_firststartdate_dvid where start_dt between '2017-01-01' and '${end_date}')b 
on lower(a.dvid)=lower(b.dvid)
group by a.ctl_dt,'active_date_distribute',case when datediff(a.ctl_dt,b.start_dt)=0 then 'd0'  when datediff(a.ctl_dt,b.start_dt)<=30 then 'd30'
when datediff(a.ctl_dt,b.start_dt)<=60 then 'd60'   when datediff(a.ctl_dt,b.start_dt)<=90 then 'd90'
when datediff(a.ctl_dt,b.start_dt)<=120 then 'd120' when datediff(a.ctl_dt,b.start_dt)<=150 then 'd150'
when datediff(a.ctl_dt,b.start_dt)<=180 then 'd180' else 'd181' end ;"



hive -e "${active}"                  >> app_operate.txt
hive -e "${loss}"                    >> app_operate.txt
hive -e "${active_month_distribute}" >> app_operate.txt
hive -e "${active_date_distribute}"  >> app_operate.txt


while [ ${start_day} -le ${end_day} ]
do
current_date=`date  +"%Y-%m-%d" -d  "${start_day}"`

week_active="
use ycappdata;
select '${current_date}','week_active',count(distinct dvid)  from sa_daydau_detail 
where ctl_dt between date_sub('${current_date}',pmod(datediff('${current_date}', '2017-01-02'), 7)) and '${current_date}'
group by '${current_date}','week_active';  "

month_active="
use ycappdata;
select '${current_date}','month_active',count(distinct dvid)  from sa_daydau_detail 
where ctl_dt between trunc('${current_date}','MM') and '${current_date}'
group by '${current_date}','month_active';  "

active_active_distribute="
use ycappdata;
select '${current_date}','active_active_distribute',concat('d',days),count(distinct ab.dvid) from 
(select b.dvid,count(distinct b.ctl_dt) as days from 
(select ctl_dt,dvid from sa_daydau_detail
where ctl_dt='${current_date}')a 
join 
(select ctl_dt,dvid from sa_daydau_detail
where ctl_dt between date_sub('${current_date}',14) and '${current_date}')b 
on  lower(a.dvid)=lower(b.dvid)
group by b.dvid )ab 
group by '${current_date}','active_active_distribute',concat('d',days);"

newuser_retain="
use ycappdata;
select a.start_dt,'newuser_retain',concat('d',datediff(b.ctl_dt,a.start_dt)),count(distinct b.dvid) from 
(select start_dt,dvid from sa_firststartdate_dvid
where start_dt between date_sub('${current_date}',30) and '${current_date}')a 
left outer join
(select ctl_dt,dvid from sa_daydau_detail 
where ctl_dt between date_sub('${current_date}',30) and '${current_date}')b 
on lower(a.dvid)=lower(b.dvid)
group by a.start_dt,'newuser_retain',concat('d',datediff(b.ctl_dt,a.start_dt)); "

active_retain="
use ycappdata;
select a.ctl_dt,'active_retain',concat('d',datediff(b.ctl_dt,a.ctl_dt)),count(distinct b.dvid) from 
(select ctl_dt,dvid from sa_daydau_detail 
where ctl_dt between date_sub('${current_date}',30) and '${current_date}')a 
left outer join
(select ctl_dt,dvid from sa_daydau_detail 
where ctl_dt between date_sub('${current_date}',30) and '${current_date}')b 
on lower(a.dvid)=lower(b.dvid)
where a.ctl_dt<=b.ctl_dt
group by a.ctl_dt,'active_retain',concat('d',datediff(b.ctl_dt,a.ctl_dt)); "





echo "${week_active}" 
echo "${month_active}" 
echo "${active_active_distribute}" 
echo "${newuser_retain}" 
echo "${active_retain}" 

hive -e "${week_active}"                 >> app_operate.txt
hive -e "${month_active}"                >> app_operate.txt
hive -e "${active_active_distribute}"    >> app_operate.txt
hive -e "${newuser_retain}"              >> app_operate.txt
hive -e "${active_retain}"               >> app_operate.txt
start_day=`date  +"%Y%m%d" -d  "${start_day} 1 days" `
done

 

posted @ 2018-01-09 16:50  Mars.wang  阅读(949)  评论(0编辑  收藏  举报