1、模仿建表语句,创建subject表,并使用hdfs dfs -put 命令加载数据
CREATE TABLE `subject`(
`subject_id` bigint COMMENT '科目id',
`subject_name` string COMMENT '科目名称')
COMMENT '科目表'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';
2、查询学生分数(输出:学号,姓名,班级,科目id,科目名称,成绩)
input tables: 3张表
不需要分组、聚合、排序,直接三张表关联即可
select t1.id
,t1.name
,t1.clazz
,t2.score_id
,t2.score
,t3.subject_name
from students t1
left join score t2
on t1.id = t2.id
left join subject t3
on t2.score_id = t3.subject_id;
3、查询学生总分(输出:学号,姓名,班级,总分)
input tables: students score
要求总分,得按照学生id分组求sum
不需要排序,需要关联一次
关联次数:6000次
先关联再分组求和
select t1.id
,t1.name
,t1.clazz
,sum(t2.score) as sum_score
from students t1
left join score t2
on t1.id = t2.id
group by t1.id,t1.name,t1.clazz
limit 10;
关联1000次
先分组求和,再关联
select t1.id
,t1.name
,t1.clazz
,t2.sum_score
from students t1
left join(
select id
,sum(score) sum_score
from score
group by id
) t2 on t1.id = t2.id
limit 10;
4、查询全年级总分排名前三(不分文理科)的学生(输出:学号,姓名,班级,总分)
select t1.id
,t1.name
,t1.clazz
,t2.sum_score
from students t1
left join(
select id
,sum(score) sum_score
from score
group by id
) t2 on t1.id = t2.id
order by t2.sum_score desc
limit 3;
5、查询文科一班学生总分排名前10的学生(输出:学号,姓名,班级,总分)
select t1.id
,t1.name
,t1.clazz
,t2.sum_score
from students t1
left join(
select id
,sum(score) sum_score
from score
group by id
) t2 on t1.id = t2.id
where t1.clazz = "文科一班"
order by t2.sum_score desc
limit 10;
6、查询每个班级学生总分的平均成绩(输出:班级,平均分)
select t1.clazz
,avg(t2.sum_score) avg_sum_score
from students t1
left join(
select id
,sum(score) sum_score
from score
group by id
) t2 on t1.id = t2.id
group by t1.clazz;
7、查询每个班级的最高总分(输出:班级,总分)
select t1.clazz
,max(t2.sum_score) max_sum_score
from students t1
left join(
select id
,sum(score) sum_score
from score
group by id
) t2 on t1.id = t2.id
group by t1.clazz;
8、(思考)查询每个班级总分排名前三的学生(输出:学号,姓名,班级,总分)
窗口函数:row_number()
select ttt1.name
,ttt1.score_sum
,ttt1.clazz
,ttt1.rn
from(
select name
,score_sum
,clazz
,row_number() over(partition by clazz order by score_sum desc) as rn
from (
select t1.id
,t1.name
,t1.clazz
,t2.score_sum
from students t1
left join (
select id
,sum(score) as score_sum
from score
group by id
) t2 on t1.id = t2.id
) tt1
) ttt1 where ttt1.rn <=3;
create table students(
> id bigint comment '学生 id',
> name string comment '学生姓名',
> age int comment '学生年龄',
> gender string comment '学生性别',
> clazz string comment '学生班级'
> ) comment '学生信息表'
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
create table score(
> id bigint comment '学生 id',
> score_id bigint comment '科目 id',
> score int comment '学生成绩'
> ) comment '学生成绩表'
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
create table subject(
> subject_id bigint comment '科目id',
> subject_name string comment '科目名称'
> )comment '科目目表'
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';