需求 : 计算每个用户最大的连续登录天数,可以间隔一天。解释:如果一个用户在 1,3,5,6 登录游戏,则视为连续 6 天登录
-- 数据准备
-- DDL
create table test2 (
`user_id` string comment '用户id',
`login_date` string comment '登入日期',
`login_status` string comment '登入状态(1:成功,2:异常)')
comment '用户登入记录表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;
-- DML
insert overwrite table test2
select '1001' as id,'2021-12-12' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2021-12-20' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2022-02-10' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2021-12-20' as occur_date,'0' as lowcarbon union all
select '1002' as id,'2021-12-12' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2021-12-13' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2021-12-13' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2021-12-13' as occur_date,'1' as lowcarbon union all
select '1002' as id,'2021-12-14' as occur_date,'0' as lowcarbon union all
select '1001' as id,'2021-12-14' as occur_date,'1' as lowcarbon union all
select '1002' as id,'2021-12-15' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2021-12-15' as occur_date,'1' as lowcarbon union all
select '1003' as id,'2021-12-15' as occur_date,'0' as lowcarbon union all
select '1003' as id,'2021-12-16' as occur_date,'1' as lowcarbon union all
select '1003' as id,'2021-12-17' as occur_date,'1' as lowcarbon union all
select '1003' as id,'2021-12-18' as occur_date,'1' as lowcarbon union all
select '1003' as id,'2021-12-29' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2022-01-01' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2022-01-01' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2022-01-01' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2022-01-03' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2022-01-05' as occur_date,'1' as lowcarbon union all
select '1001' as id,'2022-01-06' as occur_date,'1' as lowcarbon union all
select '1003' as id,'2021-12-19' as occur_date,'1' as lowcarbon ;
-- 数据说明
-- 某游戏公司记录的用户每日登录数据
-- 每天每天可能有多次登入记录
-- 分析思路
2
4
6
7
8
10
11
13
14
怎样判断序列连续? 连续规则为间隔为1或者2 示例: 1 3 5 6 视为连续
获取前置元素差值法
1. 获取当前元素的前一位元素
2 null
4 2
6 4
7 6
8 7
10 8
11 10
13 11
14 13
2. 当前元素 与前置元素 做差
2 null null
4 2 2
6 4 2
7 6 1
8 7 1
10 8 2
11 10 1
13 11 2
14 13 1
3. 差值为 1、2的 视为连续(对中或队尾)标记为0, 否为视为队首,标记为1
2 null null 1
4 2 2 0
6 4 2 0
7 6 1 0
8 7 1 0
10 8 2 0
11 10 1 0
13 11 2 0
14 13 1 0
-- 执行sql
with t1 as (
select user_id
, login_date
, if(datediff(login_date,
lag(login_date) over (partition by user_id order by login_date asc)) in (0, 1, 2), 0, 1)
as if_contin
from test2
where login_status = 1
),
t2 as (
select
user_id
,login_date
,sum(if_contin) over(partition by user_id order by login_date asc) as contin_group
from t1),
t3 as (
select
user_id
,contin_group
,datediff(max(login_date),min(login_date)) + 1 as contin_days
from t2
group by user_id
,contin_group)
select
user_id
,max(contin_days)
from t3
group by user_id
;
user_id _c1
1001 6
1002 1
1003 4