MySQL高级SQL系统梳理指南
数据准备
测试表结构及数据
-- 短视频平台数据分析 - 系统化窗口函数案例
-- 场景:类似抖音/快手的短视频平台用户行为分析
CREATE database if not exists ds DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_bin ;
use ds;
DROP TABLE IF EXISTS videos;
-- 视频发布表
CREATE TABLE videos (
video_id INT PRIMARY KEY,
creator_id INT,
creator_name VARCHAR(50),
category VARCHAR(20),
publish_time DATETIME,
duration INT, -- 视频时长(秒)
views INT, -- 播放量
likes INT, -- 点赞数
comments INT, -- 评论数
shares INT -- 分享数
);
-- 插入测试数据
INSERT INTO ds.videos (video_id, creator_id, creator_name, category, publish_time, duration, views, likes, comments, shares) VALUES
(1, 123, 'Alice', 'Gaming', '2023-05-15 14:23:45', 480, 25000, 1200, 85, 45),
(2, 456, 'Bob', 'Music', '2023-08-22 09:15:30', 240, 180000, 8500, 420, 380),
(3, 789, 'Charlie', 'Comedy', '2023-03-10 20:45:12', 360, 45000, 2800, 150, 95),
(4, 234, 'David', 'Education', '2023-11-05 16:30:20', 720, 95000, 4200, 380, 220),
(5, 567, 'Eva', 'Technology', '2023-07-18 11:20:35', 420, 320000, 15600, 850, 1200),
(6, 890, 'Frank', 'Sports', '2023-09-03 19:45:50', 300, 78000, 3900, 280, 185),
(7, 345, 'Grace', 'Food', '2023-04-28 13:15:25', 600, 125000, 7200, 450, 380),
(8, 678, 'Henry', 'Travel', '2023-12-12 08:30:40', 900, 220000, 11500, 680, 950),
(9, 901, 'Ivy', 'Beauty', '2023-06-07 15:45:15', 180, 85000, 6800, 320, 280),
(10, 112, 'Jack', 'Fashion', '2023-01-25 12:20:30', 240, 55000, 4200, 180, 125),
(11, 334, 'Kate', 'News', '2023-10-14 17:30:45', 480, 380000, 12000, 1200, 850),
(12, 556, 'Leo', 'Science', '2023-02-28 10:45:20', 840, 450000, 28500, 1850, 2200),
(13, 778, 'Mary', 'Health', '2023-08-16 21:15:35', 360, 95000, 5800, 420, 380),
(14, 223, 'Nick', 'Finance', '2023-05-30 14:30:50', 720, 180000, 8900, 680, 520),
(15, 445, 'Olivia', 'Politics', '2023-09-22 16:45:25', 600, 280000, 15200, 1350, 980),
(16, 667, 'Peter', 'Entertainment', '2023-03-18 18:20:40', 420, 125000, 9800, 580, 420),
(17, 889, 'Queen', 'Documentary', '2023-11-08 13:15:55', 1800, 850000, 45000, 3200, 4500),
(18, 111, 'Rose', 'Animation', '2023-07-04 09:30:15', 480, 320000, 18500, 980, 1200),
(19, 333, 'Sam', 'Tutorial', '2023-12-20 15:45:30', 960, 180000, 12800, 850, 680),
(20, 555, 'Tom', 'Review', '2023-04-12 11:20:45', 720, 95000, 7200, 420, 280),
(21, 777, '张三', 'Gaming', '2023-06-25 20:15:20', 360, 68000, 4200, 280, 185),
(22, 999, '李四', 'Music', '2023-01-15 14:30:35', 240, 125000, 8500, 520, 380),
(23, 121, '王五', 'Comedy', '2023-09-08 17:45:50', 300, 45000, 3200, 150, 95),
(24, 343, '赵六', 'Education', '2023-05-22 12:15:25', 600, 180000, 9800, 680, 520),
(25, 565, '孙七', 'Technology', '2023-11-18 19:30:40', 480, 380000, 22500, 1200, 1850),
(26, 787, '周八', 'Sports', '2023-03-28 16:45:15', 420, 95000, 5800, 380, 280),
(27, 909, '吴九', 'Food', '2023-08-14 21:20:30', 720, 220000, 15600, 850, 1200),
(28, 131, '郑十', 'Travel', '2023-12-05 08:15:45', 900, 125000, 8900, 520, 420),
(29, 353, '冯十一', 'Beauty', '2023-07-20 13:30:20', 180, 85000, 7200, 420, 320),
(30, 575, '陈十二', 'Fashion', '2023-02-14 10:45:35', 360, 68000, 4800, 280, 185),
(31, 797, 'Alice', 'Gaming', '2023-10-30 15:20:50', 540, 32000, 1850, 95, 68),
(32, 919, 'Bob', 'Music', '2023-04-18 18:15:25', 300, 155000, 9200, 580, 420),
(33, 141, 'Charlie', 'Comedy', '2023-12-28 11:30:40', 420, 58000, 3800, 185, 125),
(34, 363, 'David', 'Education', '2023-06-12 20:45:15', 780, 125000, 7800, 520, 380),
(35, 585, 'Eva', 'Technology', '2023-01-28 14:20:30', 360, 280000, 18500, 980, 1350),
(36, 807, 'Frank', 'Sports', '2023-09-15 17:15:45', 480, 85000, 5200, 320, 220),
(37, 929, 'Grace', 'Food', '2023-05-08 12:30:20', 600, 180000, 12800, 680, 850),
(38, 151, 'Henry', 'Travel', '2023-11-22 19:45:35', 1200, 320000, 19800, 1200, 1850),
(39, 373, 'Ivy', 'Beauty', '2023-03-16 16:20:50', 240, 95000, 7800, 420, 380),
(40, 595, 'Jack', 'Fashion', '2023-08-05 21:15:25', 300, 68000, 4800, 280, 185),
(41, 817, 'Kate', 'News', '2023-12-18 09:30:40', 480, 450000, 22500, 1850, 2200),
(42, 939, 'Leo', 'Science', '2023-07-10 13:45:15', 960, 580000, 35000, 2800, 3800),
(43, 161, 'Mary', 'Health', '2023-02-25 18:20:30', 420, 125000, 8900, 520, 420),
(44, 383, 'Nick', 'Finance', '2023-10-12 15:15:45', 600, 220000, 15600, 980, 850),
(45, 605, 'Olivia', 'Politics', '2023-04-30 11:30:20', 780, 380000, 25200, 1850, 1950),
(46, 827, 'Peter', 'Entertainment', '2023-09-18 20:45:35', 360, 155000, 12500, 680, 520),
(47, 949, 'Queen', 'Documentary', '2023-06-28 14:20:50', 2400, 950000, 58000, 4200, 5800),
(48, 171, 'Rose', 'Animation', '2023-01-12 17:15:25', 420, 280000, 18800, 1200, 1350),
(49, 393, 'Sam', 'Tutorial', '2023-11-05 12:30:40', 840, 185000, 14500, 850, 680),
(50, 615, 'Tom', 'Review', '2023-08-22 19:45:15', 600, 125000, 9200, 580, 420),
(51, 123, '张三', 'Gaming', '2023-03-15 10:20:30', 480, 85000, 5800, 380, 280),
(52, 456, '李四', 'Music', '2023-12-08 16:15:45', 320, 155000, 11200, 680, 520),
(53, 789, '王五', 'Comedy', '2023-07-25 21:30:20', 240, 58000, 4200, 220, 125),
(54, 234, '赵六', 'Education', '2023-02-18 13:45:35', 720, 220000, 15600, 980, 850),
(55, 567, '孙七', 'Technology', '2023-10-28 18:20:50', 540, 450000, 32500, 2200, 2800),
(56, 890, '周八', 'Sports', '2023-05-15 15:15:25', 360, 125000, 8500, 520, 380),
(57, 345, '吴九', 'Food', '2023-11-30 11:30:40', 600, 280000, 18800, 1200, 1500),
(58, 678, '郑十', 'Travel', '2023-08-18 20:45:15', 960, 185000, 12800, 850, 680),
(59, 901, '冯十一', 'Beauty', '2023-04-05 14:20:30', 180, 95000, 8200, 480, 380),
(60, 112, '陈十二', 'Fashion', '2023-12-22 17:15:45', 300, 78000, 5800, 320, 220),
(61, 334, 'Alice', 'Gaming', '2023-06-10 12:30:20', 420, 45000, 2800, 150, 95),
(62, 556, 'Bob', 'Music', '2023-01-25 19:45:35', 360, 220000, 15600, 850, 1200),
(63, 778, 'Charlie', 'Comedy', '2023-09-12 16:20:50', 300, 68000, 4800, 280, 185),
(64, 223, 'David', 'Education', '2023-05-28 21:15:25', 840, 155000, 9800, 680, 520),
(65, 445, 'Eva', 'Technology', '2023-12-15 08:30:40', 480, 380000, 28500, 1850, 2500),
(66, 667, 'Frank', 'Sports', '2023-03-22 13:45:15', 420, 95000, 6200, 380, 280),
(67, 889, 'Grace', 'Food', '2023-11-08 18:20:30', 720, 250000, 18200, 1200, 1350),
(68, 111, 'Henry', 'Travel', '2023-07-18 15:15:45', 1080, 185000, 13500, 850, 680),
(69, 333, 'Ivy', 'Beauty', '2023-02-28 11:30:20', 240, 125000, 9800, 580, 420),
(70, 555, 'Jack', 'Fashion', '2023-10-15 20:45:35', 360, 85000, 6500, 380, 280),
(71, 777, 'Kate', 'News', '2023-04-22 14:20:50', 600, 520000, 35000, 2800, 3200),
(72, 999, 'Leo', 'Science', '2023-12-05 17:15:25', 900, 680000, 45000, 3800, 4500),
(73, 121, 'Mary', 'Health', '2023-08-28 12:30:40', 480, 155000, 12500, 680, 520),
(74, 343, 'Nick', 'Finance', '2023-06-15 19:45:15', 720, 280000, 19800, 1350, 1200),
(75, 565, 'Olivia', 'Politics', '2023-01-30 16:20:30', 600, 450000, 32500, 2500, 2800),
(76, 787, 'Peter', 'Entertainment', '2023-09-25 21:15:45', 420, 185000, 15200, 850, 680),
(77, 909, 'Queen', 'Documentary', '2023-05-12 09:30:20', 3600, 1200000, 85000, 6500, 8200),
(78, 131, 'Rose', 'Animation', '2023-12-28 13:45:35', 540, 350000, 25200, 1650, 1950),
(79, 353, 'Sam', 'Tutorial', '2023-03-18 18:20:50', 780, 220000, 16800, 1200, 980),
(80, 575, 'Tom', 'Review', '2023-11-02 15:15:25', 480, 155000, 11500, 680, 520),
(81, 797, '张三', 'Gaming', '2023-07-20 11:30:40', 360, 95000, 6800, 420, 320),
(82, 919, '李四', 'Music', '2023-02-15 20:45:15', 240, 185000, 13800, 850, 680),
(83, 141, '王五', 'Comedy', '2023-10-08 14:20:30', 300, 68000, 4800, 280, 185),
(84, 363, '赵六', 'Education', '2023-04-25 17:15:45', 600, 280000, 19800, 1350, 1200),
(85, 585, '孙七', 'Technology', '2023-12-18 12:30:20', 420, 520000, 38500, 2800, 3500),
(86, 807, '周八', 'Sports', '2023-08-05 19:45:35', 480, 125000, 8900, 580, 420),
(87, 929, '吴九', 'Food', '2023-06-22 16:20:50', 720, 320000, 22500, 1500, 1850),
(88, 151, '郑十', 'Travel', '2023-01-18 21:15:25', 840, 220000, 15600, 980, 850),
(89, 373, '冯十一', 'Beauty', '2023-09-30 08:30:40', 180, 115000, 9500, 520, 420),
(90, 595, '陈十二', 'Fashion', '2023-05-15 13:45:15', 320, 85000, 6200, 380, 280),
(91, 817, 'Alice', 'Gaming', '2023-12-08 18:20:30', 540, 58000, 3800, 220, 155),
(92, 939, 'Bob', 'Music', '2023-03-25 15:15:45', 280, 280000, 19800, 1200, 1350),
(93, 161, 'Charlie', 'Comedy', '2023-11-12 11:30:20', 360, 78000, 5500, 320, 220),
(94, 383, 'David', 'Education', '2023-07-28 20:45:35', 720, 185000, 12800, 850, 680),
(95, 605, 'Eva', 'Technology', '2023-02-12 14:20:50', 480, 450000, 32500, 2200, 2800),
(96, 827, 'Frank', 'Sports', '2023-10-30 17:15:25', 420, 115000, 8200, 480, 350),
(97, 949, 'Grace', 'Food', '2023-06-18 12:30:40', 600, 380000, 28500, 1850, 2200),
(98, 171, 'Henry', 'Travel', '2023-01-05 19:45:15', 1200, 250000, 18200, 1200, 1350),
(99, 393, 'Ivy', 'Beauty', '2023-09-22 16:20:30', 240, 145000, 11500, 680, 520),
(100, 615, 'Jack', 'Fashion', '2023-05-08 21:15:45', 360, 95000, 7200, 420, 320);
1. 窗口函数 (Window Functions)
基本语法
SELECT column_name,
window_function() OVER (
[PARTITION BY partition_expression]
[ORDER BY sort_expression]
[ROWS/RANGE window_frame]
)
FROM table_name;
常用窗口函数
排名函数
- ROW_NUMBER: 分页、去重
- RANK/DENSE_RANK: 排行榜
- PERCENT_RANK: 性能评估
ROW_NUMBER - 唯一行号,常用于分页和去重
-- ROW_NUMBER 唯一行号,常用于分页和去重
SELECT
ROW_NUMBER() OVER(ORDER BY views desc) as row_id,s.*,
CASE
WHEN ROW_NUMBER() OVER (ORDER BY views DESC) <= 5 THEN '热门推荐'
WHEN ROW_NUMBER() OVER (ORDER BY views DESC) <=8 THEN '中度推荐'
WHEN ROW_NUMBER() OVER (ORDER BY views DESC) <=10 THEN '轻度推荐'
END as rec_tag
FROM ds.videos s;
RANK vs DENSE_RANK - 处理并列排名的两种策略
SELECT creator_name, likes,
RANK() OVER (ORDER BY likes DESC) as rank_with_gaps, -- 跳跃式排名:1,2,2,4
DENSE_RANK() OVER (ORDER BY likes DESC) as rank_no_gaps -- 连续式排名:1,2,2,3
FROM videos
ORDER BY likes DESC;
PERCENT_RANK - 百分位排名,用于性能评估
SELECT video_id, creator_name, views,
percent_rank() OVER (ORDER BY views) as percentile
FROM ds.videos s;
SELECT video_id, creator_name, views,
PERCENT_RANK() OVER (ORDER BY views) as percentile,
CASE
WHEN PERCENT_RANK() OVER (ORDER BY views) >= 0.8 THEN 'Top 20%'
WHEN PERCENT_RANK() OVER (ORDER BY views) >= 0.5 THEN 'Top 50%'
ELSE 'Below Average'
END as performance_tier
FROM videos;
取值函数
- LAG/LEAD: 环比增长率
- FIRST/LAST_VALUE: 基准对比
- NTH_VALUE: 特定位置取值
FIRST_VALUE/LAST_VALUE - 窗口内首尾值
SELECT creator_name, video_id, publish_time, views,
FIRST_VALUE(views) OVER (PARTITION BY creator_name ORDER BY publish_time) as first_video_views,
LAST_VALUE(views) OVER (PARTITION BY creator_name ORDER BY publish_time
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as latest_video_views -- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 定义窗口函数的计算范围为整个分区,从第一行到最后一行。
FROM videos
ORDER BY creator_name, publish_time;
LAG/LEAD - 时间序列分析核心
SELECT creator_name, video_id, publish_time, views,
LAG(views,1) OVER(PARTITION BY creator_name ORDER BY publish_time) as prev_views
FROM videos
ORDER BY creator_name, publish_time;
SELECT creator_name, video_id, publish_time, views,
LEAD(views,1) OVER(PARTITION BY creator_name ORDER BY publish_time) as next_views
FROM videos
ORDER BY creator_name, publish_time;
增长率计算
SELECT creator_name, video_id, publish_time, views,
(views - LAG(views,1) OVER(PARTITION BY creator_name ORDER BY publish_time)) * 100.0 / LAG(views,1) OVER(PARTITION BY creator_name ORDER BY publish_time) as growth_rate_pct
FROM videos
ORDER BY creator_name, publish_time;
增长率计算(保留2位小数)
SELECT creator_name, video_id, publish_time, views,
ROUND(
(views - LAG(views, 1) OVER (PARTITION BY creator_name ORDER BY publish_time)) * 100.0 /
LAG(views, 1) OVER (PARTITION BY creator_name ORDER BY publish_time), 2
) as growth_rate_pct
FROM videos
ORDER BY creator_name, publish_time;
NTH_VALUE - 取指定位置的值
SELECT creator_name, views,
NTH_VALUE(views, 2) OVER (PARTITION BY creator_name ORDER BY views DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as second_best_views -- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 定义窗口函数的计算范围为整个分区,从第一行到最后一行。
FROM videos;
聚合函数
- SUM() OVER: 累计求和、移动平均
- AVG() OVER: 个体与群体比较
移动窗口聚合 - 趋势分析
SELECT
DATE(publish_time) as date,
SUM(views) as daily_views,
-- 累计播放量
SUM(SUM(views)) OVER (ORDER BY DATE(publish_time)) as cumulative_views
FROM videos
GROUP BY DATE(publish_time)
ORDER BY date;
SELECT
DATE(publish_time) as date,
SUM(views) as daily_views,
-- 3日移动平均
AVG(SUM(views)) OVER (ORDER BY DATE(publish_time)
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as ma3_views, -- 计算按日期排序的浏览量总和的三行移动平均值。
-- 累计播放量
SUM(SUM(views)) OVER (ORDER BY DATE(publish_time)) as cumulative_views
FROM videos
GROUP BY DATE(publish_time)
ORDER BY date;
分组聚合对比 - 个体与群体比较
SELECT creator_name, category, views,
AVG(views) OVER (PARTITION BY category) as category_avg_views,
views - AVG(views) OVER (PARTITION BY category) as vs_category_avg
FROM videos;
SELECT creator_name, category, views,
SUM(views) OVER (PARTITION BY creator_name) as creator_total_views,
views * 100.0 / SUM(views) OVER (PARTITION BY creator_name) AS contribution_pct, -- 创作者贡献度
ROUND(views * 100.0 / SUM(views) OVER (PARTITION BY creator_name),2) AS contribution_pct -- 创作者贡献度(保留2位小数)
FROM videos;
分布函数(例子不明显待完善)
- NTILE: 用户分层、数据分桶
- CUME_DIST: 异常检测
SELECT creator_name, views, likes,
NTILE(3) OVER (ORDER BY views) as views_tier,
NTILE(4) OVER (ORDER BY likes) as likes_quartile,
-- 组合分层策略
CASE
WHEN NTILE(3) OVER (ORDER BY views) = 1 THEN '播放量-低'
WHEN NTILE(3) OVER (ORDER BY views) = 2 THEN '播放量-中'
ELSE '播放量-高'
END as views_level
FROM videos;
SELECT video_id, views,
CUME_DIST() OVER (ORDER BY views) as cumulative_dist,
CASE
WHEN CUME_DIST() OVER (ORDER BY views) > 0.95 THEN '异常高播放量'
WHEN CUME_DIST() OVER (ORDER BY views) < 0.05 THEN '异常低播放量'
ELSE '正常范围'
END as anomaly_flag
FROM videos;
应用场景
- 内容创作者画像分析
WITH creator_stats AS (
SELECT
creator_name,
COUNT(*) as video_count,
AVG(views) as avg_views,
AVG(likes) as avg_likes,
SUM(views) as total_views,
AVG(duration) as avg_duration
FROM videos
GROUP BY creator_name
)
SELECT
creator_name,
video_count,
avg_views,
-- 创作者分层
NTILE(3) OVER (ORDER BY total_views DESC) as creator_tier,
-- 产出效率排名
RANK() OVER (ORDER BY avg_views DESC) as efficiency_rank,
-- 与平台平均的比较
ROUND(avg_views / AVG(avg_views) OVER (), 2) as vs_platform_avg
FROM creator_stats
ORDER BY total_views DESC;
- 病毒式传播分析 - 找出爆款视频规律
SELECT
video_id,
creator_name,
category,
views,
likes,
-- 互动率计算
ROUND(likes * 100.0 / NULLIF(views, 0), 2) as engagement_rate,
-- 爆款识别
CASE
WHEN views > AVG(views) OVER () * 2 THEN '超级爆款'
WHEN views > AVG(views) OVER () * 1.5 THEN '准爆款'
ELSE '普通视频'
END as viral_status,
-- 类别内排名
RANK() OVER (PARTITION BY category ORDER BY views DESC) as category_rank
FROM videos
ORDER BY views DESC;
- 用户留存分析 - 模拟连续发布创作者
SELECT
creator_name,
video_id,
publish_time,
-- 发布间隔天数
DATEDIFF(
publish_time,
LAG(publish_time) OVER (PARTITION BY creator_name ORDER BY publish_time)
) as days_since_last_post,
-- 连续发布标识
CASE
WHEN DATEDIFF(publish_time, LAG(publish_time) OVER (PARTITION BY creator_name ORDER BY publish_time)) <= 1
THEN '连续创作者'
ELSE '非连续创作者'
END as creator_consistency
FROM videos
ORDER BY creator_name, publish_time;
2. 公用表表达式 (CTE)
基于MySQL 8.x
基础CTE - 替代子查询
- 简单CTE - 找出高于平均播放量的视频
WITH avg_stats AS (
SELECT AVG(views) as avg_views, AVG(likes) as avg_likes
FROM videos
)
SELECT v.video_id, v.creator_name, v.views, v.likes,
ROUND(v.views / a.avg_views, 2) as views_ratio,
ROUND(v.likes / a.avg_likes, 2) as likes_ratio
FROM videos v, avg_stats a
WHERE v.views > a.avg_views;
- 多个CTE - 创作者分层分析
WITH creator_stats AS (
SELECT creator_name,
COUNT(*) as video_count,
SUM(views) as total_views,
AVG(views) as avg_views
FROM videos
GROUP BY creator_name
),
platform_benchmarks AS (
SELECT AVG(video_count) as avg_video_count,
AVG(total_views) as avg_total_views
FROM creator_stats
)
SELECT cs.creator_name,
cs.video_count,
cs.total_views,
-- 与平台基准比较
CASE
WHEN cs.total_views > pb.avg_total_views THEN '头部创作者'
WHEN cs.video_count > pb.avg_video_count THEN '活跃创作者'
ELSE '普通创作者'
END as creator_tier
FROM creator_stats cs, platform_benchmarks pb
ORDER BY cs.total_views DESC;
递归CTE - 处理层次数据
病毒传播模拟 - 递归计算传播层级
WITH RECURSIVE viral_spread AS (
-- 初始种子视频(播放量>80000的爆款)
SELECT video_id, creator_name, views, 1 as spread_level
FROM videos
WHERE views > 80000
UNION ALL
-- 模拟传播:每层传播影响相同创作者的其他视频
SELECT v.video_id, v.creator_name, v.views, vs.spread_level + 1
FROM videos v
INNER JOIN viral_spread vs ON v.creator_name = vs.creator_name
WHERE vs.spread_level < 3 AND v.video_id != vs.video_id
)
SELECT DISTINCT video_id, creator_name, views, spread_level,
CASE spread_level
WHEN 1 THEN '源头爆款'
WHEN 2 THEN '一度传播'
WHEN 3 THEN '二度传播'
END as spread_type
FROM viral_spread
ORDER BY spread_level, views DESC;
窗口函数 + CTE 组合
- 创作者成长轨迹分析
WITH creator_timeline AS (
SELECT creator_name, video_id, publish_time, views,
ROW_NUMBER() OVER (PARTITION BY creator_name ORDER BY publish_time) as video_seq,
LAG(views) OVER (PARTITION BY creator_name ORDER BY publish_time) as prev_views
FROM videos
),
growth_analysis AS (
SELECT *,
CASE
WHEN prev_views IS NULL THEN '首发视频'
WHEN views > prev_views * 1.5 THEN '大幅增长'
WHEN views > prev_views THEN '稳步增长'
ELSE '播放下滑'
END as growth_trend
FROM creator_timeline
)
SELECT creator_name,
COUNT(*) as total_videos,
SUM(CASE WHEN growth_trend = '大幅增长' THEN 1 ELSE 0 END) as big_growth_count,
SUM(CASE WHEN growth_trend = '稳步增长' THEN 1 ELSE 0 END) as steady_growth_count,
ROUND(
SUM(CASE WHEN growth_trend IN ('大幅增长', '稳步增长') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2
) as growth_rate_pct
FROM growth_analysis
WHERE video_seq > 1 -- 排除首发视频
GROUP BY creator_name
ORDER BY growth_rate_pct DESC;
- 热门内容挖掘 - 多维度分析
WITH category_performance AS (
SELECT category,
COUNT(*) as video_count,
AVG(views) as avg_views,
AVG(likes) as avg_likes,
RANK() OVER (ORDER BY AVG(views) DESC) as category_rank
FROM videos
GROUP BY category
),
top_videos AS (
SELECT video_id, creator_name, category, views, likes,
RANK() OVER (PARTITION BY category ORDER BY views DESC) as rank_in_category
FROM videos
)
SELECT cp.category,
cp.avg_views,
cp.category_rank,
tv.creator_name as top_creator,
tv.views as top_video_views,
ROUND(tv.views / cp.avg_views, 2) as vs_category_avg
FROM category_performance cp
INNER JOIN top_videos tv ON cp.category = tv.category
WHERE tv.rank_in_category = 1
ORDER BY cp.category_rank;
复杂业务场景 - CTE最佳实践
- 漏斗分析 - 用户行为转化
WITH video_metrics AS (
SELECT video_id, creator_name, views, likes, comments, shares,
-- 计算转化率
ROUND(likes * 100.0 / NULLIF(views, 0), 2) as like_rate,
ROUND(comments * 100.0 / NULLIF(views, 0), 2) as comment_rate,
ROUND(shares * 100.0 / NULLIF(views, 0), 2) as share_rate
FROM videos
),
conversion_tiers AS (
SELECT *,
CASE
WHEN like_rate >= 5 THEN '高互动'
WHEN like_rate >= 2 THEN '中互动'
ELSE '低互动'
END as engagement_tier
FROM video_metrics
)
SELECT engagement_tier,
COUNT(*) as video_count,
AVG(views) as avg_views,
AVG(like_rate) as avg_like_rate,
AVG(comment_rate) as avg_comment_rate,
AVG(share_rate) as avg_share_rate
FROM conversion_tiers
GROUP BY engagement_tier
ORDER BY AVG(like_rate) DESC;
- AB测试效果分析 - 模拟不同时长对播放量的影响
WITH duration_groups AS (
SELECT video_id, creator_name, duration, views,
CASE
WHEN duration <= 60 THEN '短视频(≤1分钟)'
WHEN duration <= 180 THEN '中视频(1-3分钟)'
ELSE '长视频(>3分钟)'
END as duration_group
FROM videos
),
group_stats AS (
SELECT duration_group,
COUNT(*) as sample_size,
AVG(views) as avg_views,
STDDEV(views) as stddev_views
FROM duration_groups
GROUP BY duration_group
)
SELECT duration_group,
sample_size,
ROUND(avg_views, 0) as avg_views,
ROUND(stddev_views, 0) as stddev_views,
-- 置信区间计算
ROUND(avg_views - 1.96 * stddev_views / SQRT(sample_size), 0) as ci_lower,
ROUND(avg_views + 1.96 * stddev_views / SQRT(sample_size), 0) as ci_upper
FROM group_stats
ORDER BY avg_views DESC;
- CTE vs 子查询 性能对比示例
- 传统子查询写法(不推荐)
SELECT creator_name, video_count, avg_views
FROM (
SELECT creator_name,
COUNT(*) as video_count,
AVG(views) as avg_views
FROM videos
GROUP BY creator_name
) creator_summary
WHERE video_count >= (
SELECT AVG(video_count)
FROM (
SELECT COUNT(*) as video_count
FROM videos
GROUP BY creator_name
) sub
);

-
- CTE优化写法(推荐)
WITH creator_summary AS (
SELECT creator_name,
COUNT(*) as video_count,
AVG(views) as avg_views
FROM videos
GROUP BY creator_name
),
benchmark AS (
SELECT AVG(video_count) as avg_video_count
FROM creator_summary
)
SELECT cs.creator_name, cs.video_count, cs.avg_views
FROM creator_summary cs, benchmark b
WHERE cs.video_count >= b.avg_video_count;
3. 高级子查询
- 标量子查询 - 返回单值,用于比较和计算(返回单个值,可用于SELECT/WHERE )
-- 在SELECT中使用标量子查询 - 与全局基准对比
SELECT video_id, creator_name, views,
(SELECT AVG(views) FROM videos) as platform_avg_views,
views - (SELECT AVG(views) FROM videos) as vs_avg_diff,
ROUND(views * 100.0 / (SELECT SUM(views) FROM videos), 2) as market_share_pct
FROM videos
ORDER BY views DESC;
-- 在WHERE中使用标量子查询 - 动态筛选条件
SELECT creator_name, video_id, views, likes
FROM videos
WHERE views > (SELECT AVG(views) FROM videos) -- 高于平均播放量
ORDER BY views DESC;
-- 相关标量子查询 - 每个创作者与自己历史对比
SELECT creator_name, video_id, views, publish_time,
(SELECT AVG(views) FROM videos v2 WHERE v2.creator_name = v1.creator_name) as personal_avg,
(SELECT MAX(views) FROM videos v2 WHERE v2.creator_name = v1.creator_name) as personal_best,
CASE
WHEN views = (SELECT MAX(views) FROM videos v2 WHERE v2.creator_name = v1.creator_name) THEN '个人最佳'
WHEN views > (SELECT AVG(views) FROM videos v2 WHERE v2.creator_name = v1.creator_name) THEN '超常发挥'
ELSE '低于平均'
END as performance_level
FROM videos v1
ORDER BY creator_name, views DESC;
- 行子查询 - 返回单行,用于行级匹配
-- 行子查询 - 查找最高播放量的视频完整信息
SELECT creator_name, video_id, views, likes, comments
FROM videos
WHERE (views, likes) = (SELECT MAX(views), MAX(likes) FROM videos);
-- 行子查询 - 多维度最优匹配
SELECT creator_name, category, views, likes,
views + likes as total_engagement
FROM videos
WHERE (views, likes) >= (SELECT AVG(views), AVG(likes) FROM videos) -- 双高视频
ORDER BY total_engagement DESC;
- 列子查询 - 返回单列,配合IN/ANY/ALL
-- IN子查询 - 查找头部创作者的所有视频
SELECT creator_name, video_id, views, category
FROM videos
WHERE creator_name IN (
SELECT creator_name
FROM videos
GROUP BY creator_name
HAVING SUM(views) > 100000 -- 总播放量超过10万的创作者
)
ORDER BY creator_name, views DESC;
-- ANY/SOME子查询 - 存在性检查
SELECT creator_name, video_id, views
FROM videos v1
WHERE views > ANY ( -- 播放量超过任意一个科技类视频
SELECT views FROM videos WHERE category = '科技'
)
AND category != '科技' -- 排除科技类本身
ORDER BY views DESC;
-- ALL子查询 - 全量比较
SELECT creator_name, video_id, views, category
FROM videos
WHERE views > ALL ( -- 播放量超过所有健身类视频
SELECT views FROM videos WHERE category = '健身'
)
ORDER BY views DESC;
- 表子查询 - 返回结果集,用于复杂分析
-- 表子查询 + JOIN - 创作者分层分析
SELECT base.creator_name, base.total_views, base.video_count,
tier.tier_name, tier.min_views, tier.max_views
FROM (
SELECT creator_name,
SUM(views) as total_views,
COUNT(*) as video_count
FROM videos
GROUP BY creator_name
) base
JOIN (
SELECT '头部创作者' as tier_name, 150000 as min_views, 999999999 as max_views
UNION ALL
SELECT '腰部创作者', 50000, 149999
UNION ALL
SELECT '尾部创作者', 0, 49999
) tier ON base.total_views BETWEEN tier.min_views AND tier.max_views
ORDER BY base.total_views DESC;
-- 复杂表子查询 - 类别竞争力分析
SELECT category_stats.category,
category_stats.avg_views,
category_stats.video_count,
category_stats.top_creator,
category_stats.competition_index
FROM (
SELECT category,
AVG(views) as avg_views,
COUNT(*) as video_count,
-- 找到该类别播放量最高的创作者
(SELECT creator_name FROM videos v2
WHERE v2.category = v1.category
ORDER BY views DESC LIMIT 1) as top_creator,
-- 计算竞争指数:标准差/平均值
ROUND(STDDEV(views) / AVG(views), 2) as competition_index
FROM videos v1
GROUP BY category
) category_stats
ORDER BY category_stats.avg_views DESC;
- EXISTS子查询 - 存在性判断
-- EXISTS - 找出有多个视频的活跃创作者
SELECT DISTINCT creator_name,
(SELECT COUNT(*) FROM videos v2 WHERE v2.creator_name = v1.creator_name) as video_count,
(SELECT MAX(views) FROM videos v2 WHERE v2.creator_name = v1.creator_name) as best_performance
FROM videos v1
WHERE EXISTS (
SELECT 1 FROM videos v2
WHERE v2.creator_name = v1.creator_name
AND v2.video_id != v1.video_id -- 存在其他视频
)
ORDER BY video_count DESC;
-- NOT EXISTS - 找出某类别的独家创作者
SELECT creator_name, category, views
FROM videos v1
WHERE NOT EXISTS (
SELECT 1 FROM videos v2
WHERE v2.creator_name = v1.creator_name
AND v2.category != v1.category -- 不存在其他类别的视频
)
ORDER BY category, views DESC;
-- 找出每个类别中超越该类别平均水平的视频
select a.category,v.creator_name,v.video_id,v.views, a.avs
from (
select category,avg(views) as avs
from ds.videos
group by category
) a, ds.videos v
where v.views > a.avs and v.category = a.category;
-- 子查询 + 窗口函数 - 动态基准比较
SELECT creator_name, video_id, views, category,
-- 动态计算:与同类别前3名的平均值比较
(SELECT AVG(views) FROM (
SELECT views FROM videos v2
WHERE v2.category = v1.category
ORDER BY views DESC
LIMIT 3
) top3) as category_top3_avg,
CASE
WHEN views >= (SELECT AVG(views) FROM (
SELECT views FROM videos v2
WHERE v2.category = v1.category
ORDER BY views DESC
LIMIT 3
) top3) THEN '顶级水平'
WHEN views >= (SELECT AVG(views) FROM videos v2 WHERE v2.category = v1.category) THEN '优秀水平'
ELSE '有待提升'
END as performance_rating
FROM videos v1
ORDER BY category, views DESC;
-- 复杂业务场景 - 潜力视频挖掘
SELECT v1.video_id, v1.creator_name, v1.views, v1.likes, v1.category,
growth_potential,
engagement_score
FROM videos v1
JOIN (
SELECT video_id,
-- 计算增长潜力:与同创作者历史最佳的差距
(SELECT MAX(views) FROM videos v2 WHERE v2.creator_name = v.creator_name) - views as growth_potential,
-- 计算互动分数:相对于同类别的表现
ROUND(
(likes * 1.0 / NULLIF(views, 0)) /
(SELECT AVG(likes * 1.0 / NULLIF(views, 0)) FROM videos v3 WHERE v3.category = v.category) * 100,
2
) as engagement_score
FROM videos v
) metrics ON v1.video_id = metrics.video_id
WHERE growth_potential > 0 -- 有增长空间
AND engagement_score > 100 -- 互动率高于同类别平均
ORDER BY growth_potential DESC, engagement_score DESC;
4. 高级聚合操作
窗口函数(Window Functions)
- 排名函数
-- 按播放量排名(相同排名会跳跃)
SELECT creator_name, views,
RANK() OVER (ORDER BY views DESC) as rank_views
FROM videos;
-- 按播放量密集排名(相同排名不跳跃)
SELECT creator_name, views,
DENSE_RANK() OVER (ORDER BY views DESC) as dense_rank_views
FROM videos;
-- 行号(连续编号)
SELECT creator_name, views,
ROW_NUMBER() OVER (ORDER BY views DESC) as row_num
FROM videos;
-- 分类别排名
SELECT creator_name, category, views,
RANK() OVER (PARTITION BY category ORDER BY views DESC) as category_rank
FROM videos;
- 聚合窗口函数
-- 累计播放量
SELECT creator_name, publish_time, views,
SUM(views) OVER (ORDER BY publish_time) as cumulative_views
FROM videos;
-- 移动平均(3天窗口)
SELECT creator_name, publish_time, views,
AVG(views) OVER (ORDER BY publish_time ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as moving_avg
FROM videos;
-- 分组累计
SELECT creator_name, category, views,
SUM(views) OVER (PARTITION BY category ORDER BY publish_time) as category_cumulative
FROM videos;
- 偏移函数
-- 获取上一条记录的播放量
SELECT creator_name, views,
LAG(views, 1) OVER (ORDER BY publish_time) as prev_views,
views - LAG(views, 1) OVER (ORDER BY publish_time) as views_diff
FROM videos;
-- 获取下一条记录的播放量
SELECT creator_name, views,
LEAD(views, 1) OVER (ORDER BY publish_time) as next_views
FROM videos;
分组统计与多维分析
ROLLUP - 层级汇总
-- 按创作者和分类进行层级汇总
SELECT creator_name, category,
SUM(views) as total_views,
COUNT(*) as video_count
FROM videos
GROUP BY creator_name, category WITH ROLLUP;
结果说明:
- NULL就是"加起来"的意思,比如 旅行家,NULL 是旅行家所有视频总和
- ROLLUP 就是自动加"合计"行,像收银员给你小票加个"总计"

- 前端展示可以使用的统计数据
with tmp as(
SELECT creator_name, category,
SUM(views) as total_views,
COUNT(*) as video_count
FROM ds.videos
GROUP BY creator_name, category WITH ROLLUP)
select *
from tmp
where tmp.category is NULL;

条件聚合
- CASE WHEN 配合聚合
-- 按时长分类统计
SELECT creator_name,
SUM(CASE WHEN duration < 300 THEN views ELSE 0 END) as short_video_views,
SUM(CASE WHEN duration >= 300 AND duration < 600 THEN views ELSE 0 END) as medium_video_views,
SUM(CASE WHEN duration >= 600 THEN views ELSE 0 END) as long_video_views,
COUNT(CASE WHEN likes/views > 0.05 THEN 1 END) as high_engagement_videos
FROM videos
GROUP BY creator_name;
- 过滤聚合
-- 使用 FILTER 子句(注意: MySQL不支持,postgresql支持)
SELECT category,
COUNT(*) as total_videos,
COUNT(*) FILTER (WHERE views > 10000) as popular_videos,
AVG(views) FILTER (WHERE duration > 300) as avg_long_video_views
FROM videos
GROUP BY category;
高级分组查询
- HAVING 与复杂条件
-- 找出平均播放量高且视频数量多的创作者
SELECT creator_name,
COUNT(*) as video_count,
AVG(views) as avg_views,
SUM(likes) as total_likes
FROM videos
GROUP BY creator_name
HAVING COUNT(*) >= 2
AND AVG(views) > 50000
AND SUM(likes)/SUM(views) > 0.01;
- 子查询配合聚合
-- 找出播放量超过分类平均值的视频
SELECT v.creator_name, v.category, v.views,
cat_avg.avg_views
FROM videos v
JOIN (
SELECT category, AVG(views) as avg_views
FROM videos
GROUP BY category
) cat_avg ON v.category = cat_avg.category
WHERE v.views > cat_avg.avg_views;
高级分析函数
- 相关性分析
-- 计算点赞率和分享率的相关性
SELECT category,
COUNT(*) as sample_size,
STDDEV(views) as views_std_dev, -- STDDEV(标准差) 算数据的“分散程度”,一组数字(比如浏览量)离平均值有多远,值越大表示数据波动越大
VAR_POP(duration) as duration_variance
FROM videos
WHERE views > 0
GROUP BY category;
时间序列聚合
- 时间窗口分析
-- 每月发布视频统计
SELECT DATE_FORMAT(publish_time, '%Y-%m') as month,
COUNT(*) as videos_count,
AVG(views) as avg_views,
SUM(views) as total_views,
LAG(SUM(views)) OVER (ORDER BY DATE_FORMAT(publish_time, '%Y-%m')) as prev_month_views
FROM videos
GROUP BY DATE_FORMAT(publish_time, '%Y-%m')
ORDER BY month;
- 滑动窗口计算
-- 7天滑动平均播放量
SELECT publish_time,
AVG(views) OVER (
ORDER BY publish_time
RANGE BETWEEN INTERVAL 7 DAY PRECEDING AND CURRENT ROW
) as week_moving_avg
FROM videos
ORDER BY publish_time;
5. 复杂连接查询
- 基础连接(左连接、右连接、内连接、外连接)
- 自连接
-- 找出同一创作者中播放量差异较大的视频对
SELECT v1.video_id as video1_id, v1.views as views1,
v2.video_id as video2_id, v2.views as views2,
v1.creator_name,
ABS(v1.views - v2.views) as views_diff
FROM videos v1
JOIN videos v2 ON v1.creator_id = v2.creator_id
WHERE v1.video_id < v2.video_id -- 避免重复比较
AND ABS(v1.views - v2.views) > 5000;
6. 高级字符串处理
核心函数
- 基础操作
-- 字符串拼接与截取
SELECT
CONCAT(creator_name, '-', category) AS display_name,
SUBSTRING(creator_name, 1, 3) AS prefix,
LEFT(creator_name, 5) AS short_name,
SUBSTRING_INDEX(creator_name, ' ', 1) AS first_name
FROM videos;
-- 长度与清理
SELECT
TRIM(creator_name) AS clean_name,
LENGTH(creator_name) AS byte_len,
CHAR_LENGTH(creator_name) AS char_len,
UPPER(category) AS category_upper
FROM videos;
- 查找替换
-- 字符串替换与定位
SELECT
REPLACE(category, '_', ' ') AS formatted_category,
LOCATE(' ', creator_name) AS space_pos,
REVERSE(creator_name) AS reversed_name
FROM videos;
模式匹配
- LIKE与通配符
-- 模糊匹配
SELECT * FROM videos WHERE creator_name LIKE '段%';
SELECT * FROM videos WHERE category LIKE '%科技%';
- 正则表达式
-- 格式验证
SELECT creator_name FROM videos
WHERE creator_name REGEXP '^[A-Z][a-z]+ [A-Z][a-z]+$';
-- 提取替换
SELECT
REGEXP_SUBSTR(creator_name, '[A-Z][a-z]+') AS first_word,
REGEXP_REPLACE(creator_name, '[0-9]', '') AS no_numbers
FROM videos;
字符串聚合
-- 分组聚合
SELECT
category,
GROUP_CONCAT(creator_name ORDER BY views DESC) AS creators,
GROUP_CONCAT(DISTINCT creator_name SEPARATOR '|') AS unique_creators
FROM videos GROUP BY category;
条件字符串处理
SELECT
creator_name,
CASE
WHEN views > 80000 THEN CONCAT(creator_name, '(热门)')
WHEN views > 5000 THEN CONCAT(creator_name, '(推荐)')
ELSE creator_name
END AS display_creator
FROM videos;
高级技巧
- 字符串函数组合
-- 复合处理
SELECT
CONCAT(
UPPER(LEFT(creator_name, 1)),
LOWER(SUBSTRING(creator_name, 2))
) AS title_case,
SUBSTRING_INDEX(
SUBSTRING_INDEX(creator_name, ' ', 2), ' ', -1
) AS middle_name
FROM videos;
- 数据清洗
-- 标准化处理
UPDATE videos SET
creator_name = TRIM(REGEXP_REPLACE(creator_name, '\\s+', ' ')),
category = UPPER(LEFT(category, 1)) + LOWER(SUBSTRING(category, 2));
- 搜索优化
-- 创建搜索字段
ALTER TABLE videos ADD COLUMN search_text TEXT
GENERATED ALWAYS AS (
CONCAT_WS(' ', creator_name, category,
CASE WHEN views > 50000 THEN 'popular' ELSE '' END)
) STORED;
-- 创建索引
CREATE INDEX idx_creator_prefix ON videos(creator_name(10));
CREATE FULLTEXT INDEX ft_search ON videos(search_text);
实用函数
- 自定义搜索
-- 模糊搜索函数
CREATE FUNCTION search_creator(term VARCHAR(50))
RETURNS TEXT READS SQL DATA
BEGIN
RETURN (
SELECT GROUP_CONCAT(creator_name)
FROM videos
WHERE creator_name LIKE CONCAT('%', term, '%')
OR SOUNDEX(creator_name) = SOUNDEX(term)
);
END;
7. 高级日期时间处理
核心函数
- 获取当前时间
SELECT
NOW(), -- '2018-04-10 13:26:27'
UNIX_TIMESTAMP(),-- '1752935187'
CURRENT_TIME(), -- '13:26:27'
CURDATE(), -- '2018-04-10'
CURRENT_DATE(),-- '2018-04-10'
CURRENT_TIMESTAMP(),-- '2018-04-10 13:26:27'
CURTIME(), -- '13:26:27'
CURRENT_USER() -- 'root@%'
FROM DUAL;
- 日期提取
-- 从发布时间提取各部分
SELECT
video_id,
publish_time,
YEAR(publish_time) AS publish_year,
EXTRACT(YEAR FROM publish_time) AS pub_year,
MONTH(publish_time) AS publish_month,
EXTRACT(MONTH FROM publish_time) AS pub_mth,
DAY(publish_time) AS publish_day,
EXTRACT(DAY FROM publish_time) AS pub_day,
HOUR(publish_time) AS publish_hour,
EXTRACT(HOUR FROM publish_time) AS pub_hour,
WEEKDAY(publish_time) AS weekday,
DAYNAME(publish_time) AS day_name
FROM videos;
日期格式化
-- 格式化显示
SELECT
video_id,
DATE_FORMAT(publish_time, '%Y-%m-%d') AS publish_date,
DATE_FORMAT(publish_time, '%H:%i:%s') AS publish_time_only,
DATE_FORMAT(publish_time, '%Y年%m月%d日 %H:%i') AS chinese_format,
DATE_FORMAT(publish_time, '%W, %M %D, %Y') AS full_format
FROM videos;
日期计算
-- 计算发布时间到现在的间隔
SELECT
video_id,
creator_name,
publish_time,
DATEDIFF(NOW(), publish_time) AS days_ago,
TIMESTAMPDIFF(HOUR, publish_time, NOW()) AS hours_ago,
TIMESTAMPDIFF(MINUTE, publish_time, NOW()) AS minutes_ago
FROM videos;
-- 日期加减
SELECT
video_id,
publish_time,
DATE_ADD(publish_time, INTERVAL 7 DAY) AS week_later,
DATE_SUB(publish_time, INTERVAL 1 MONTH) AS month_before,
publish_time + INTERVAL 1 YEAR AS next_year,
publish_time - INTERVAL 2 HOUR AS two_hours_before
FROM videos;
时间段分析
-- 按年月分组统计
SELECT
YEAR(publish_time) AS year,
MONTH(publish_time) AS month,
COUNT(*) AS video_count,
AVG(views) AS avg_views,
SUM(likes) AS total_likes
FROM videos
GROUP BY YEAR(publish_time), MONTH(publish_time)
ORDER BY year DESC, month DESC;
-- 按小时分析发布规律
SELECT
HOUR(publish_time) AS hour,
COUNT(*) AS video_count,
ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM videos), 2) AS percentage
FROM videos
GROUP BY HOUR(publish_time)
ORDER BY hour;
-- 工作日周末分析
SELECT
CASE
WHEN WEEKDAY(publish_time) IN (5, 6) THEN '周末'
ELSE '工作日'
END AS day_type,
COUNT(*) AS video_count,
AVG(views) AS avg_views,
AVG(likes) AS avg_likes
FROM videos
GROUP BY day_type;
时间区间查询
-- 最近7天的视频
SELECT * FROM videos
WHERE publish_time >= DATE_SUB(NOW(), INTERVAL 2 DAY);
-- 本月发布的视频
SELECT * FROM videos
WHERE YEAR(publish_time) = YEAR(NOW())
AND MONTH(publish_time) = MONTH(NOW());
-- 指定时间段
SELECT * FROM videos
WHERE publish_time BETWEEN '2018-01-01' AND '2018-12-31';
-- 根据播放量计算热门时间段
SELECT
DATE_FORMAT(publish_time, '%Y-%m') AS month,
COUNT(*) AS video_count,
AVG(views) AS avg_views,
CASE
WHEN AVG(views) > 100000 THEN '热门月份'
WHEN AVG(views) > 50000 THEN '普通月份'
ELSE '冷门月份'
END AS month_type
FROM videos
GROUP BY DATE_FORMAT(publish_time, '%Y-%m')
ORDER BY avg_views DESC;
时长处理
- 时长格式化
-- 将秒数转换为时分秒格式
SELECT
video_id,
duration,
CONCAT(
FLOOR(duration / 3600), ':',
LPAD(FLOOR((duration % 3600) / 60), 2, '0'), ':',
LPAD(duration % 60, 2, '0')
) AS duration_formatted,
CASE
WHEN duration < 60 THEN '短视频'
WHEN duration < 600 THEN '中等视频'
ELSE '长视频'
END AS video_type
FROM videos;
- 时长统计
-- 时长分析
SELECT
CASE
WHEN duration < 60 THEN '0-1分钟'
WHEN duration < 300 THEN '1-5分钟'
WHEN duration < 600 THEN '5-10分钟'
WHEN duration < 1800 THEN '10-30分钟'
ELSE '30分钟以上'
END AS duration_range,
COUNT(*) AS video_count,
AVG(views) AS avg_views,
AVG(likes) AS avg_likes
FROM videos
GROUP BY duration_range
ORDER BY AVG(views) DESC;
时间序列分析
趋势分析
-- 按周统计发布趋势
SELECT
YEAR(publish_time) AS year,
WEEK(publish_time) AS week,
COUNT(*) AS video_count,
AVG(views) AS avg_views,
LAG(COUNT(*)) OVER (ORDER BY YEAR(publish_time), WEEK(publish_time)) AS prev_week_count
FROM videos
GROUP BY YEAR(publish_time), WEEK(publish_time)
ORDER BY year, week;
-- 累计统计
SELECT
DATE_FORMAT(publish_time, '%Y-%m') AS month,
COUNT(*) AS monthly_count,
SUM(COUNT(*)) OVER (ORDER BY DATE_FORMAT(publish_time, '%Y-%m')) AS cumulative_count
FROM videos
GROUP BY DATE_FORMAT(publish_time, '%Y-%m')
ORDER BY month;
实用查询模式
热门时间段识别
-- 找出最佳发布时间
SELECT
HOUR(publish_time) AS best_hour,
DAYNAME(publish_time) AS best_day,
AVG(views) AS avg_views,
AVG(likes) AS avg_likes,
COUNT(*) AS sample_count
FROM videos
WHERE publish_time >= DATE_SUB(NOW(), INTERVAL 3 MONTH)
GROUP BY HOUR(publish_time), DAYNAME(publish_time)
HAVING COUNT(*) >= 5
ORDER BY avg_views DESC
LIMIT 10;
复合时间查询
创建时间维度视图
-- 时间分析视图
CREATE VIEW video_time_analysis AS
SELECT
video_id,
creator_name,
category,
publish_time,
views,
likes,
-- 时间维度
YEAR(publish_time) AS publish_year,
QUARTER(publish_time) AS publish_quarter,
MONTH(publish_time) AS publish_month,
WEEK(publish_time) AS publish_week,
DAYOFWEEK(publish_time) AS publish_weekday,
HOUR(publish_time) AS publish_hour,
-- 时间计算
DATEDIFF(NOW(), publish_time) AS days_since_publish,
TIMESTAMPDIFF(HOUR, publish_time, NOW()) AS hours_since_publish,
-- 时间分类
CASE
WHEN HOUR(publish_time) BETWEEN 6 AND 11 THEN '上午'
WHEN HOUR(publish_time) BETWEEN 12 AND 17 THEN '下午'
WHEN HOUR(publish_time) BETWEEN 18 AND 23 THEN '晚上'
ELSE '深夜'
END AS time_period,
CASE
WHEN WEEKDAY(publish_time) IN (5, 6) THEN '周末'
ELSE '工作日'
END AS day_type
FROM videos;
8. 存储过程与函数
存储函数
基础计算函数
-- 计算互动率
DELIMITER $$
CREATE FUNCTION calculate_engagement_rate(p_views INT, p_likes INT, p_comments INT, p_shares INT)
RETURNS DECIMAL(5,2)
READS SQL DATA
DETERMINISTIC
BEGIN
DECLARE engagement_rate DECIMAL(5,2);
IF p_views = 0 OR p_views IS NULL THEN
RETURN 0;
END IF;
SET engagement_rate = ((p_likes + p_comments + p_shares) / p_views) * 100;
RETURN engagement_rate;
END$$
DELIMITER ;
-- 使用函数
SELECT
video_id,
creator_name,
views,
calculate_engagement_rate(views, likes, comments, shares) AS engagement_rate
FROM videos;
查看代码
-- 视频热度等级
DELIMITER $$
CREATE FUNCTION get_video_grade(p_views INT, p_likes INT)
RETURNS VARCHAR(10)
READS SQL DATA
DETERMINISTIC
BEGIN
DECLARE grade VARCHAR(10);
DECLARE engagement_score INT;
SET engagement_score = p_views + (p_likes * 10);
CASE
WHEN engagement_score >= 1000000 THEN SET grade = 'S级';
WHEN engagement_score >= 500000 THEN SET grade = 'A级';
WHEN engagement_score >= 100000 THEN SET grade = 'B级';
WHEN engagement_score >= 10000 THEN SET grade = 'C级';
ELSE SET grade = 'D级';
END CASE;
RETURN grade;
END$$
DELIMITER ;
时长格式化函数
-- 格式化视频时长
DELIMITER $$
CREATE FUNCTION format_duration(p_duration INT)
RETURNS VARCHAR(20)
DETERMINISTIC
BEGIN
DECLARE hours INT;
DECLARE minutes INT;
DECLARE seconds INT;
DECLARE result VARCHAR(20);
SET hours = FLOOR(p_duration / 3600);
SET minutes = FLOOR((p_duration % 3600) / 60);
SET seconds = p_duration % 60;
IF hours > 0 THEN
SET result = CONCAT(hours, ':', LPAD(minutes, 2, '0'), ':', LPAD(seconds, 2, '0'));
ELSE
SET result = CONCAT(minutes, ':', LPAD(seconds, 2, '0'));
END IF;
RETURN result;
END$$
DELIMITER ;
存储过程
-- 创作者统计分析
DELIMITER $$
CREATE PROCEDURE analyze_creator_performance(IN p_creator_id INT)
BEGIN
DECLARE v_total_videos INT;
DECLARE v_total_views BIGINT;
DECLARE v_avg_engagement DECIMAL(5,2);
-- 统计视频数量
SELECT COUNT(*) INTO v_total_videos
FROM videos WHERE creator_id = p_creator_id;
-- 统计总播放量
SELECT SUM(views) INTO v_total_views
FROM videos WHERE creator_id = p_creator_id;
-- 计算平均互动率
SELECT AVG(calculate_engagement_rate(views, likes, comments, shares))
INTO v_avg_engagement
FROM videos WHERE creator_id = p_creator_id;
-- 返回结果
SELECT
p_creator_id AS creator_id,
v_total_videos AS total_videos,
v_total_views AS total_views,
v_avg_engagement AS avg_engagement_rate,
CASE
WHEN v_avg_engagement >= 5.0 THEN '优秀'
WHEN v_avg_engagement >= 3.0 THEN '良好'
WHEN v_avg_engagement >= 1.0 THEN '一般'
ELSE '较差'
END AS performance_level;
END$$
DELIMITER ;
-- 调用过程
CALL analyze_creator_performance(1001);
数据更新过程
-- 批量更新视频等级
DELIMITER $$
CREATE PROCEDURE update_video_grades()
BEGIN
DECLARE done INT DEFAULT FALSE;
DECLARE v_video_id INT;
DECLARE v_views INT;
DECLARE v_likes INT;
DECLARE v_grade VARCHAR(10);
DECLARE video_cursor CURSOR FOR
SELECT video_id, views, likes FROM videos;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;
-- 添加等级列(如果不存在)
SET @sql = 'ALTER TABLE videos ADD COLUMN IF NOT EXISTS grade VARCHAR(10)';
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
OPEN video_cursor;
read_loop: LOOP
FETCH video_cursor INTO v_video_id, v_views, v_likes;
IF done THEN
LEAVE read_loop;
END IF;
SET v_grade = get_video_grade(v_views, v_likes);
UPDATE videos
SET grade = v_grade
WHERE video_id = v_video_id;
END LOOP;
CLOSE video_cursor;
SELECT CONCAT('更新完成,共处理 ', ROW_COUNT(), ' 条记录') AS result;
END$$
DELIMITER ;
分页查询过程
-- 分页获取视频列表
DELIMITER $$
CREATE PROCEDURE get_videos_page(
IN p_page_num INT,
IN p_page_size INT,
IN p_category VARCHAR(20),
IN p_order_by VARCHAR(20)
)
BEGIN
DECLARE v_offset INT;
DECLARE v_sql TEXT;
SET v_offset = (p_page_num - 1) * p_page_size;
SET v_sql = 'SELECT
video_id,
creator_name,
category,
views,
likes,
comments,
format_duration(duration) AS duration_formatted,
calculate_engagement_rate(views, likes, comments, shares) AS engagement_rate,
publish_time
FROM videos WHERE 1=1';
IF p_category IS NOT NULL AND p_category != '' THEN
SET v_sql = CONCAT(v_sql, ' AND category = ''', p_category, '''');
END IF;
CASE p_order_by
WHEN 'views' THEN SET v_sql = CONCAT(v_sql, ' ORDER BY views DESC');
WHEN 'likes' THEN SET v_sql = CONCAT(v_sql, ' ORDER BY likes DESC');
WHEN 'publish_time' THEN SET v_sql = CONCAT(v_sql, ' ORDER BY publish_time DESC');
ELSE SET v_sql = CONCAT(v_sql, ' ORDER BY video_id DESC');
END CASE;
SET v_sql = CONCAT(v_sql, ' LIMIT ', p_page_size, ' OFFSET ', v_offset);
SET @sql = v_sql;
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
END$$
DELIMITER ;
-- 调用分页查询
CALL get_videos_page(1, 10, 'tech', 'views');
条件控制与循环
复杂业务逻辑处理
-- 视频推荐算法
DELIMITER $$
CREATE PROCEDURE recommend_videos(
IN p_user_category VARCHAR(20),
IN p_min_views INT,
OUT p_recommended_count INT
)
BEGIN
DECLARE v_count INT DEFAULT 0;
DROP TEMPORARY TABLE IF EXISTS temp_recommendations;
CREATE TEMPORARY TABLE temp_recommendations (
video_id INT,
creator_name VARCHAR(50),
score DECIMAL(10,2)
);
-- 基于分类推荐
IF p_user_category IS NOT NULL THEN
INSERT INTO temp_recommendations
SELECT
video_id,
creator_name,
(views * 0.6 + likes * 0.3 + comments * 0.1) as score
FROM videos
WHERE category = p_user_category
AND views >= p_min_views
ORDER BY score DESC
LIMIT 20;
SET v_count = ROW_COUNT();
END IF;
-- 如果推荐数量不足,补充热门视频
IF v_count < 10 THEN
INSERT INTO temp_recommendations
SELECT
video_id,
creator_name,
(views * 0.6 + likes * 0.3 + comments * 0.1) as score
FROM videos
WHERE video_id NOT IN (SELECT video_id FROM temp_recommendations)
AND views >= p_min_views
ORDER BY score DESC
LIMIT (10 - v_count);
END IF;
SELECT * FROM temp_recommendations ORDER BY score DESC;
SELECT COUNT(*) INTO p_recommended_count FROM temp_recommendations;
DROP TEMPORARY TABLE temp_recommendations;
END$$
DELIMITER ;
-- 调用推荐过程
SET @rec_count = 0;
CALL recommend_videos('tech', 1000, @rec_count);
SELECT @rec_count AS total_recommendations;
异常处理
带异常处理的数据操作
-- 安全的数据更新过程
DELIMITER $$
CREATE PROCEDURE safe_update_video_stats(
IN p_video_id INT,
IN p_new_views INT,
IN p_new_likes INT,
OUT p_success BOOLEAN,
OUT p_message VARCHAR(200)
)
BEGIN
DECLARE EXIT HANDLER FOR SQLEXCEPTION
BEGIN
ROLLBACK;
GET DIAGNOSTICS CONDITION 1
p_message = MESSAGE_TEXT;
SET p_success = FALSE;
END;
START TRANSACTION;
-- 验证输入参数
IF p_video_id IS NULL OR p_new_views < 0 OR p_new_likes < 0 THEN
SET p_success = FALSE;
SET p_message = '参数错误:视频ID不能为空,播放量和点赞数不能为负数';
ROLLBACK;
ELSE
-- 检查视频是否存在
IF NOT EXISTS(SELECT 1 FROM videos WHERE video_id = p_video_id) THEN
SET p_success = FALSE;
SET p_message = CONCAT('视频ID ', p_video_id, ' 不存在');
ROLLBACK;
ELSE
-- 执行更新
UPDATE videos
SET views = p_new_views,
likes = p_new_likes
WHERE video_id = p_video_id;
COMMIT;
SET p_success = TRUE;
SET p_message = '更新成功';
END IF;
END IF;
END$$
DELIMITER ;
-- 调用安全更新过程
SET @success = FALSE;
SET @msg = '';
CALL safe_update_video_stats(1001, 50000, 2000, @success, @msg);
SELECT @success AS is_success, @msg AS message;
实用工具过程
数据备份与清理
-- 数据归档过程
DELIMITER $$
CREATE PROCEDURE archive_old_videos(IN p_days_old INT)
BEGIN
DECLARE v_archive_count INT;
-- 创建归档表(如果不存在)
CREATE TABLE IF NOT EXISTS videos_archive LIKE videos;
-- 将旧数据移动到归档表
INSERT INTO videos_archive
SELECT * FROM videos
WHERE publish_time < DATE_SUB(NOW(), INTERVAL p_days_old DAY);
SET v_archive_count = ROW_COUNT();
-- 删除原表中的旧数据
DELETE FROM videos
WHERE publish_time < DATE_SUB(NOW(), INTERVAL p_days_old DAY);
SELECT CONCAT('已归档 ', v_archive_count, ' 条记录') AS result;
END$$
DELIMITER ;
统计报表生成
-- 生成统计报表
DELIMITER $$
CREATE PROCEDURE generate_stats_report()
BEGIN
-- 总体统计
SELECT
'总体统计' AS report_type,
COUNT(*) AS total_videos,
SUM(views) AS total_views,
SUM(likes) AS total_likes,
AVG(calculate_engagement_rate(views, likes, comments, shares)) AS avg_engagement
FROM videos
UNION ALL
-- 分类统计
SELECT
CONCAT('分类-', category) AS report_type,
COUNT(*) AS total_videos,
SUM(views) AS total_views,
SUM(likes) AS total_likes,
AVG(calculate_engagement_rate(views, likes, comments, shares)) AS avg_engagement
FROM videos
GROUP BY category
UNION ALL
-- 创作者TOP5
SELECT
CONCAT('创作者-', creator_name) AS report_type,
COUNT(*) AS total_videos,
SUM(views) AS total_views,
SUM(likes) AS total_likes,
AVG(calculate_engagement_rate(views, likes, comments, shares)) AS avg_engagement
FROM videos
GROUP BY creator_name
ORDER BY total_views DESC
LIMIT 5;
END$$
DELIMITER ;
调试与管理
查看存储过程状态
-- 查看已创建的存储过程和函数
SHOW PROCEDURE STATUS WHERE Db = DATABASE();
SHOW FUNCTION STATUS WHERE Db = DATABASE();
-- 查看具体定义
SHOW CREATE PROCEDURE analyze_creator_performance;
SHOW CREATE FUNCTION calculate_engagement_rate;
-- 删除存储过程和函数
DROP PROCEDURE IF EXISTS analyze_creator_performance;
DROP FUNCTION IF EXISTS calculate_engagement_rate;
9. 性能优化技巧
慢查询分析
-- 开启慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 2;
-- 查看慢查询状态
SHOW GLOBAL STATUS LIKE 'Slow_queries';
-- 分析具体查询性能
explain SELECT DISTINCT
v1.creator_name as creator,
v1.category,
COUNT(v1.video_id) as video_count
FROM videos v1
WHERE YEAR(v1.publish_time) = 2018
AND MONTH(v1.publish_time) = 1
GROUP BY v1.creator_id, v1.creator_name, v1.category
ORDER BY COUNT(v1.video_id) DESC;
-- 使用EXPLAIN FORMAT=JSON获取详细信息
EXPLAIN FORMAT=JSON
SELECT DISTINCT
v1.creator_name as creator,
v1.category,
COUNT(v1.video_id) as video_count
FROM videos v1
WHERE YEAR(v1.publish_time) = 2018
AND MONTH(v1.publish_time) = 1
GROUP BY v1.creator_id, v1.creator_name, v1.category
ORDER BY COUNT(v1.video_id) DESC;
表结构优化
数据类型优化
-- 优化数据类型选择
ALTER TABLE videos
MODIFY COLUMN creator_id INT UNSIGNED,
MODIFY COLUMN views INT UNSIGNED,
MODIFY COLUMN likes MEDIUMINT UNSIGNED,
MODIFY COLUMN comments MEDIUMINT UNSIGNED,
MODIFY COLUMN shares SMALLINT UNSIGNED,
MODIFY COLUMN duration SMALLINT UNSIGNED;
-- 使用ENUM替代VARCHAR(如果值固定)
ALTER TABLE videos
ADD COLUMN status ENUM('draft', 'published', 'archived', 'deleted')
DEFAULT 'published';
垂直分割
-- 将不常用字段分离到扩展表
CREATE TABLE video_extended (
video_id INT PRIMARY KEY,
description TEXT,
tags TEXT,
thumbnail_url VARCHAR(255),
processing_info JSON,
FOREIGN KEY (video_id) REFERENCES videos(video_id)
);
-- 热点数据表
CREATE TABLE video_stats (
video_id INT PRIMARY KEY,
views INT UNSIGNED,
likes MEDIUMINT UNSIGNED,
comments MEDIUMINT UNSIGNED,
shares SMALLINT UNSIGNED,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (video_id) REFERENCES videos(video_id)
);
配置优化
-- 查看当前配置
SHOW VARIABLES LIKE 'innodb_buffer_pool_size';
SHOW VARIABLES LIKE 'query_cache%';
SHOW VARIABLES LIKE 'tmp_table_size';
-- 建议配置(需要修改my.cnf)
/*
[mysqld]
# InnoDB缓冲池大小(物理内存的70-80%)
innodb_buffer_pool_size = 2G
# 查询缓存
query_cache_type = 1
query_cache_size = 256M
# 临时表大小
tmp_table_size = 64M
max_heap_table_size = 64M
# 连接数
max_connections = 200
max_connect_errors = 1000
# 日志设置
slow_query_log = 1
long_query_time = 2
*/
【待完善】
索引策略: 复合索引、覆盖索引、前缀索引
查询优化: 避免函数、优化JOIN、改写子查询
表结构: 合适的数据类型、垂直/水平分割
配置调优: 缓冲池、连接数、慢查询
缓存机制: 查询缓存、结果缓存、汇总表
批量操作: 批量插入/更新、事务控制
监控诊断: 性能监控、索引分析、碎片清理
分区策略: 时间分区、哈希分区、范围分区
参考:http://spring.io/guides/gs/rest-service-cors/



浙公网安备 33010602011771号