3月16日

-- 1. 原始日志清洗
CREATE TABLE cleaned_logs AS
SELECT user_id, action, duration
FROM raw_logs
WHERE duration > 0;

-- 2. 按用户分区分桶存储
CREATE TABLE user_stats
PARTITIONED BY (dt STRING)
CLUSTERED BY (user_id) INTO 32 BUCKETS
AS
SELECT user_id, AVG(duration) AS avg_duration
FROM cleaned_logs
GROUP BY user_id;

-- 3. 输出报表
INSERT OVERWRITE LOCAL DIRECTORY '/report'
SELECT dt, user_id, avg_duration
FROM user_stats
ORDER BY avg_duration DESC;

 

posted @ 2025-03-17 07:59  KuanDong24  阅读(11)  评论(0)    收藏  举报