连续登录天数及其变种

样例数据

点击查看代码
insert overwrite table test.ysj_lianxu_not 
  select 'a',        't1'
  union all
  select 'a',        't2'
  union all
  select 'a',        't3'
  union all 
  select 'b',         't3'
  union all
  select 'a',        't4'
  union all
  select 'a',        't5'
  union all
  select 'a',        't6'
  union all
  select 'a',        't7'
;

连续登录超过4天的用户

点击查看代码
SELECT diff,count(DISTINCT user_id ) d_ct
from (
SELECT user_id,click_time,date_sub(click_time,rk) as diff 
from (
SELECT user_id,click_time,row_number() over(PARTITION BY user_id ORDER BY click_time ) as rk 
from test.ysj_lianxu_not
) aa
) temp 
GROUP BY diff 
having count(DISTINCT user_id ) >=4
;

连续登录用户,且中间没有其他用户登录

  • 第一种方式
点击查看代码
select  user_id,diff,count(*)
from
(select user_id,rk_1-rk_2 diff
from
(select user_id
,row_number() over(order by click_time asc ) rk_1
,row_number() over(partition by user_id order by click_time asc ) rk_2
from test.ysj_lianxu_not
) b
) c
group by user_id,diff
having count(*)=4
;

第一种方式有以下的问题,应该用第二种方法

点击查看代码
--连续登陆天数的用户,并且中间没有其他用户登陆,此方法有问题,如果两个用户a和B,a用户和b用户在同一天的全局排序的顺序不同,则会造成误差
-- 	user_id	click_time	rk_1	rk_2
-- 	a	     t1	         1	     1
-- 	a	     t2	         2	     2
-- 	b	     t3	         3	     1
-- 	a	     t3	         4	     3 比如t3这一天,a排在b的后边,则会造成这一天数据和之后的日期连续,计算出来的a的连续登陆天数就会多一天
-- 	a	     t4	         5	     4
-- 	a	     t5	         6	     5
-- 	a	     t6	         7	     6
-- 	a	     t7	         8	     7
	

  • 第二种方式:
点击查看代码
WITH temp as (
SELECt click_time
from (
SELECT click_time,count(distinct user_id) as ct 
from test.ysj_lianxu_not
GROUP BY click_time
) aa
where ct = 1 
)
select  diff,user_id,count(*)
from (
select user_id,cast(replace(click_time,'t','') as int) - rk as diff 
from (
        SELECT  t2.user_id,t1.click_time,row_number() over(PARTITION BY t2.user_id ORDER BY t1.click_time ASC) as rk 
        from temp t1
        left join test.ysj_lianxu_not t2
        on t1.click_time = t2.click_time
        ) aa 
) bb 
GROUP BY diff,user_id
;

连续登录天数,允许间隔3天
间隔见天都可以,只需要将sql中的space函数的参数改成对应的天数减1就能满足

点击查看代码
with temp as (
select 111 as uid,'2023-01-01' as dt 
union all
select 111 as uid,'2023-01-03' as dt 
union all
select 111 as uid,'2023-01-05' as dt 
union all
select 111 as uid,'2023-01-06' as dt 
union all
select 111 as uid,'2023-01-09' as dt 
union all
select 111 as uid,'2023-01-10' as dt 
union all
select 111 as uid,'2023-01-11' as dt 
union all
select 111 as uid,'2023-01-13' as dt 
union all
select 111 as uid,'2023-01-17' as dt 
union all
select 111 as uid,'2023-01-19' as dt 
union all
select 111 as uid,'2023-01-20' as dt 
union all
select 222 as uid,'2023-01-01' as dt 
union all
select 222 as uid,'2023-01-02' as dt 
union all
select 222 as uid,'2023-01-05' as dt 
union all
select 222 as uid,'2023-01-07' as dt 
union all
select 222 as uid,'2023-01-08' as dt 
),
temp2 as (
        select *,date_add(tt.dt,sp.pos) as last_dt
        from temp  tt
        lateral view posexplode(split(space(2),'')) sp 
) ,
temp3 as (
        select uid,last_dt
        from temp2
        group by last_dt,uid
) 
select uid,diff,count(*) as ct 
from (
        select uid,date_sub(last_dt,rk) as diff
        from (
                select uid,last_dt,row_number() over(partition by uid order by last_dt ) as rk
                from temp3
        ) result 
) ed
group by uid,diff 
posted @ 2024-06-20 20:40  A1340399426  阅读(37)  评论(0)    收藏  举报