-- DDL
create table bktab (
team string comment '球队名称',
number int comment '球员号码',
score_time string comment '得分时间',
score int comment '得分分数',
name string comment '球员姓名')
comment 'AB球队得分流水表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;
-- DML
insert overwrite table bktab
select 'A',1,'2020/8/28 9:01:14',1,'A1' union all
select 'A',5,'2020/8/28 9:02:28',1,'A5' union all
select 'B',4,'2020/8/28 9:03:42',3,'B4' union all
select 'A',4,'2020/8/28 9:04:55',3,'A4' union all
select 'B',1,'2020/8/28 9:06:09',3,'B1' union all
select 'A',3,'2020/8/28 9:07:23',3,'A3' union all
select 'A',4,'2020/8/28 9:08:37',3,'A4' union all
select 'B',1,'2020/8/28 9:09:51',2,'B1' union all
select 'B',2,'2020/8/28 9:11:05',2,'B2' union all
select 'B',4,'2020/8/28 9:12:18',1,'B4' union all
select 'A',1,'2020/8/28 9:13:32',2,'A1' union all
select 'A',1,'2020/8/28 9:13:52',2,'A1' union all
select 'A',1,'2020/8/28 9:14:46',1,'A1' union all
select 'A',4,'2020/8/28 9:16:00',1,'A4' union all
select 'B',3,'2020/8/28 9:17:14',3,'B3' union all
select 'B',2,'2020/8/28 9:18:28',3,'B2' union all
select 'A',2,'2020/8/28 9:19:42',3,'A2' union all
select 'A',1,'2020/8/28 9:20:55',1,'A1' union all
select 'B',3,'2020/8/28 9:22:09',2,'B3' union all
select 'B',3,'2020/8/28 9:23:23',3,'B3' union all
select 'A',5,'2020/8/28 9:24:37',2,'A5' union all
select 'B',1,'2020/8/28 9:25:51',3,'B1' union all
select 'B',2,'2020/8/28 9:27:05',1,'B2' union all
select 'A',3,'2020/8/28 9:28:18',1,'A3' union all
select 'B',4,'2020/8/28 9:29:32',1,'B4' union all
select 'A',1,'2020/8/28 9:30:46',3,'A1' union all
select 'B',1,'2020/8/28 9:31:00',1,'B1' union all
select 'B',1,'2020/8/28 9:32:00',1,'B1' union all
select 'A',4,'2020/8/28 9:33:14',2,'A4' union all
select 'B',1,'2020/8/28 9:34:28',1,'B1' union all
select 'B',5,'2020/8/28 9:35:42',2,'B5' union all
select 'A',1,'2020/8/28 9:36:55',1,'A1' union all
select 'A',1,'2020/8/28 9:37:50',1,'A1' union all
select 'B',1,'2020/8/28 9:38:09',3,'B1' union all
select 'A',1,'2020/8/28 9:39:23',3,'A1' union all
select 'B',2,'2020/8/28 9:40:37',3,'B2' union all
select 'A',3,'2020/8/28 9:41:51',3,'A3' union all
select 'A',1,'2020/8/28 9:43:05',2,'A1' union all
select 'B',3,'2020/8/28 9:44:18',3,'B3' union all
select 'A',5,'2020/8/28 9:45:32',2,'A5' union all
select 'B',5,'2020/8/28 9:46:46',3,'B5' ;
-- 数据说明
-- bktab记录了 AB球队得分流水表
-- 每次球员进球后,就会插入一条记录,记录信息为(球队名称team,球员号码number,球员姓名name,,得分分数score 以及得分时间scoretime)
--分析思路1(获取前置元素 累计求和分组法)
-- 1.按team分组,按score_time 正向排序
-- 2.获取当前行的前一行 name
-- 3.判断当前行的name 是否与前一行的name是否相同,添加标记 不同为1 相同为0
-- 4.对标记累计求和,作为连续得分分组
-- 5.对分区计数,得出 连续得分次数
-- 执行sql
select
name
,cont_group
,count(name) as contin_cnt
from (
select
team
,number
,score_time
,score
,name
,pre_name
,if_contin
-- 累计求和,获取连续分组
,sum(if_contin) over (partition by team order by score_time asc) as cont_group
from (
select
team
,number
,score_time
,score
,name
-- 获取当前行的前一行的 name
,lag(name) over (partition by team order by score_time asc) as pre_name
-- 判断 pre_name 和name 是否相同
,if(lag(name) over (partition by team order by score_time asc) = name
,0,1
) as if_contin
from bktab
) t1
) t2
group by name
,cont_group
-- 通过这里限制 连续得分次数
having count(name) >= 3
;
-- 查询结果
name cont_group contin_cnt
A1 6 3
A1 14 3
B1 11 3
Time taken: 38.164 seconds, Fetched: 3 row(s)
-- 1.判断当前行连续特征
当前 前1 前2 后1 后2
A1 null null A1 A1
A1 A1 null A1 null
A1 A1 A1 null null
当前 = 后1 and 当前 = 后2 | 当前 = 前1 and 当前 = 后1 | 当前 = 前1 and 当前 = 前1
-- 执行sql
select distinct a.name ,a.team from
(
select *,lead(name,1) over(partition by team order by score_time) as ld1
,lead(name,2) over(partition by team order by score_time) as ld2
,lag(name,1) over(partition by team order by score_time) as lg1
,lag(name,2) over(partition by team order by score_time) as lg2
from bktab
) a
where (a.name =a.ld1 and a.name =a.ld2)
or (a.name =a.ld1 and a.name =a.lg1)
or (a.name=a.lg1 and a.name=a.lg2);
-- 查询结果
a.name a.team
A1 A
B1 B
Time taken: 23.196 seconds, Fetched: 2 row(s)
--分析思路
1.按score_time 对全局排序
2.获取当前行 A队累计得分 B队累计得分
3.获取 当前 A队累计得分 与 B队累计得分的差值
4.当前行差值 与上一行差值,发生符合变化时,表示 分数发生了反超
-- 查询sql
select
team
,number
,score_time
,score
,name
,ateam_score
,bteam_score
from (
select
team
,number
,score_time
,score
,name
,ateam_score
,bteam_score
,diff_score
,lag(diff_score) over (order by score_time asc) as pre_diff_score
,case when diff_score > 0 and lag(diff_score) over (order by score_time asc) < 0 then 1
when diff_score < 0 and lag(diff_score) over (order by score_time asc) > 0 then 1
when diff_score is not null and lag(diff_score) over (order by score_time asc) is null then 1
else 0
end as if_surpass
from (
select
team
,number
,score_time
,score
,name
,sum(if(team = 'A',score,0)) over (order by score_time asc) as ateam_score
,sum(if(team = 'B',score,0)) over (order by score_time asc) as bteam_score
,sum(if(team = 'A',score,0)) over (order by score_time asc) - sum(if(team = 'B',score,0)) over (order by score_time asc) as diff_score
from bktab
) t1
) t2
where if_surpass = 1
;
-- 执行结果
team number score_time score name ateam_score bteam_score
A 1 2020/8/28 9:01:14 1 A1 1 0
B 4 2020/8/28 9:03:42 3 B4 2 3
A 4 2020/8/28 9:04:55 3 A4 5 3
B 1 2020/8/28 9:06:09 3 B1 5 6
A 3 2020/8/28 9:07:23 3 A3 8 6
B 3 2020/8/28 9:23:23 3 B3 21 22
A 5 2020/8/28 9:24:37 2 A5 23 22
B 1 2020/8/28 9:25:51 3 B1 23 25
A 1 2020/8/28 9:43:05 2 A1 39 38
B 3 2020/8/28 9:44:18 3 B3 39 41