Hive SQL必刷练习题:同时在线人数问题(*****)
https://blog.csdn.net/Mikkkee/article/details/136776193
--Drop
DROP TABLE IF EXISTS test_live_events;
--DDL
CREATE TABLE IF NOT EXISTS test_live_events
(
user_id INT COMMENT '用户id',
live_id INT COMMENT '直播id',
in_datetime STRING COMMENT '进入直播间时间',
out_datetime STRING COMMENT '离开直播间时间'
) COMMENT '直播间访问记录' STORED AS ORC TBLPROPERTIES ( "orc.compress" = "SNAPPY", "discover.partitions" = "false" );
--Insert items
INSERT OVERWRITE TABLE test_live_events VALUES
(1010, 1, '2023-08-12 19:00:00', '2'),
(100, 1, '2023-08-12 19:30:00', '2023-08-12 19:53:00'),
(100, 2, '2023-08-12 21:01:00', '2023-08-12 22:00:00'),
(101, 1, '2023-08-12 19:05:00', '2023-08-12 20:55:00'),
(101, 2, '2023-08-12 21:05:00', '2023-08-12 21:58:00'),
(102, 1, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(102, 2, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(102, 3, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(104, 1, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(104, 2, '2023-08-12 21:57:00', '2023-08-12 22:56:00'),
(105, 2, '2023-08-12 19:10:00', '2023-08-12 19:18:00'),
(106, 3, '2023-08-12 19:01:00', '2023-08-12 21:10:00');
WITH tmp_live_status AS (
create table tmp as
SELECT
user_id,
live_id,
in_datetime AS event_time,
1 AS status
FROM
test_live_events
UNION ALL
SELECT
user_id,
live_id,
out_datetime AS event_time,
-1 AS status
FROM
test_live_events
)
--创建临时表 ,登入的时间 后面加个自动 为1 ,登出的时间 数据 加个自动 固定值为-1 两个数据结果集union 起来
SELECT
live_id,
MAX(online_cnt) AS online_cnt
FROM (
SELECT
user_id,
live_id,
event_time,
status,
SUM(status) OVER (PARTITION BY live_id ORDER BY event_time) AS online_cnt
-- 基于 迁入\出 时间进行排序, 基于直播间id进行分组. sum 迁入迁出的度量值 . 最大的那个就是某个时间点的 直播间
最多人数
FROM
tmp
) a
GROUP BY
live_id
;

浙公网安备 33010602011771号