Hive实战——标签表造数据之case when then
背景:
测试环境需要往标签表里面灌入标签数据,
需求:
短期流失,中期流失,唤醒 ,沉睡 随机20%用户
然后6月30号 5月31号 各补一次
代码:
----------随机20%用户
select * from wolf_7uxfyecbufj1xuqp.tb_user tablesample(20 percent)
------------流失唤醒标签
SET HIVE.EXEC.DYNAMIC.PARTITION=TRUE;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table wolf.dt_user_tag partition(times,project_id,import_source,scenario_code)
select
'流失唤醒' as label,
case when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=6 then '沉睡'
when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) >6 and substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=15 then '中期流失'
when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) >15 and substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=16 then '短期流失'
else '唤醒'
end as value,
a.dt_id as dt_id,
unix_timestamp(date_add(current_date,-1),'yyyy-MM-dd')*1000 as times,
'7uXfyecbUfj1XUQP' as project_id,
'u_life' as import_source,
'UID' as scenario_code
from
(select
dt_id
from wolf_7uxfyecbufj1xuqp.tb_user tablesample(20 percent))a;
-------------补5月31号
SET HIVE.EXEC.DYNAMIC.PARTITION=TRUE;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table wolf.dt_user_tag partition(times,project_id,import_source,scenario_code)
select
'流失唤醒' as label,
case when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=6 then '沉睡'
when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) >6 and substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=15 then '中期流失'
when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) >15 and substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=16 then '短期流失'
else '唤醒'
end as value,
a.dt_id as dt_id,
unix_timestamp(2022-05-31,'yyyy-MM-dd')*1000 as times,
'7uXfyecbUfj1XUQP' as project_id,
'u_life' as import_source,
'UID' as scenario_code
from
(select
dt_id
from wolf_7uxfyecbufj1xuqp.tb_user tablesample(20 percent))a ;
-------------补6月30号
SET HIVE.EXEC.DYNAMIC.PARTITION=TRUE;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table wolf.dt_user_tag partition(times,project_id,import_source,scenario_code)
select
'流失唤醒' as label,
case when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=6 then '沉睡'
when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) >6 and substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=15 then '中期流失'
when substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) >15 and substr(a.dt_id ,6,1)+substr(a.dt_id ,7,1) <=16 then '短期流失'
else '唤醒'
end as value,
a.dt_id as dt_id,
unix_timestamp(2022-06-30,'yyyy-MM-dd')*1000 as times,
'7uXfyecbUfj1XUQP' as project_id,
'u_life' as import_source,
'UID' as scenario_code
from
(select
dt_id
from wolf_7uxfyecbufj1xuqp.tb_user tablesample(20 percent))a ;
知识点:
1.随机百分比函数
tablesample(20 percent)
这个函数还有其他随机的抽取