hive_面试题_行转列&列转行
1.列转行
1.说明
-- 说明 : 将 一列数据 转换成一行数据 -- 使用函数 : collect_set : 返回分组内元素 的迭代器(对元素去重) collect_list : 返回分组内元素 的迭代器(对元素不去重) concat_ws('指定分隔符',iter) : 返回 将所有元素用指定分隔符拼接的字符串 类似 iter.mkString("分隔符")
2.示例
-- 数据准备 -- DDL create table logintab ( `user_id` string comment '用户id', `login_date` string comment '登入日期') comment '用户登入记录表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table logintab select '1001' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-20' as occur_date union all select '1001' as id,'2022-02-10' as occur_date union all select '1001' as id,'2021-12-20' as occur_date union all select '1002' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1002' as id,'2021-12-14' as occur_date union all select '1001' as id,'2021-12-14' as occur_date union all select '1002' as id,'2021-12-15' as occur_date union all select '1001' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-16' as occur_date union all select '1003' as id,'2021-12-17' as occur_date union all select '1003' as id,'2021-12-18' as occur_date union all select '1003' as id,'2021-12-29' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-03' as occur_date union all select '1001' as id,'2022-01-05' as occur_date union all select '1001' as id,'2022-01-06' as occur_date union all select '1003' as id,'2021-12-19' as occur_date ; -- 操作sql select user_id ,collect_list(login_date) as list1 ,collect_set(login_date) as set1 ,concat_ws('*',collect_set(login_date)) as ws_set from logintab group by user_id; -- 查询结果 user_id list1 set1 ws_set 1001 ["2021-12-12","2021-12-20","2022-02-10","2021-12-20","2021-12-13","2021-12-13","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-01","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] ["2021-12-12","2021-12-20","2022-02-10","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] 2021-12-12*2021-12-20*2022-02-10*2021-12-13*2021-12-14*2021-12-15*2022-01-01*2022-01-03*2022-01-05*2022-01-06 1002 ["2021-12-12","2021-12-14","2021-12-15"] ["2021-12-12","2021-12-14","2021-12-15"] 2021-12-12*2021-12-14*2021-12-15 1003 ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] 2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19 Time taken: 9.349 seconds, Fetched: 3 row(s)
2.行转列
1.说明
-- 说明 : 将一行数据 转换成 一列数据 -- 使用函数 : explode、lateral view
2.示例
-- DDL create table logintab1 ( `user_id` string comment '用户id', `login_dates` string comment '登入日期') comment '用户登入记录表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table logintab1 select '1001' as id,'2021-12-12*2021-12-20*2022-02-10' as occur_date union all select '1002' as id,'2021-12-12*2021-12-14*2021-12-15' as occur_date union all select '1003' as id,'2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19' as occur_date ; -- 执行sql select user_id ,t1.login_date from logintab1 lateral view explode(split(login_dates,'\\*')) t1 as login_date; -- 查询结果 user_id t1.login_date 1001 2021-12-12 1001 2021-12-20 1001 2022-02-10 1002 2021-12-12 1002 2021-12-14 1002 2021-12-15 1003 2021-12-15 1003 2021-12-16 1003 2021-12-17 1003 2021-12-18 1003 2021-12-29 1003 2021-12-19 Time taken: 0.032 seconds, Fetched: 12 row(s)
3.需求 : 求出uid,name, 琴_成绩,棋_成绩,书_成绩,画_成绩, 如果没有参数某一门考试,结果成绩为0
1.数据准备
-- 数据准备 create table userinfo( uid string comment '学号', name string comment '姓名', city string comment '所在城市') comment '学生基本信息表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DDL create table scores ( uid string comment '学号', courseid string comment '课程id', score string comment '得分分数') comment '考试分数记录表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table userinfo select '1' as uid,'刘备' as name,'保定' as city union all select '2' as uid,'关羽' as name,'山西' as city union all select '3' as uid,'赵云' as name,'常山' as city union all select '4' as uid,'张飞' as name,'涿州' as city ; insert overwrite table scores select '1' as uid,'琴' as courseid,'100' as score union all select '1' as uid,'棋' as courseid,'99' as score union all select '1' as uid,'书' as courseid,'88' as score union all select '1' as uid,'画' as courseid,'77' as score union all select '2' as uid,'琴' as courseid,'60' as score union all select '2' as uid,'棋' as courseid,'50' as score union all select '3' as uid,'琴' as courseid,'70' as score union all select '4' as uid,'琴' as courseid,'99' as score union all select '4' as uid,'棋' as courseid,'77' as score union all select '4' as uid,'书' as courseid,'100' as score union all select '4' as uid,'画' as courseid,'80' as score ; -- 数据说明 -- userinfo 记录了学生的基本信息 -- scores 记录了考试的分数信息 -- 共考试四门功课 琴、棋、书、画,如果有人没有参加某一门的考试,则scores里不会有记录
2.执行sql
--需求1: 求出uid,name, 琴_成绩,棋_成绩,书_成绩,画_成绩, 如果没有参数某一门考试,结果成绩为0 --输出结果样例 --+------+---------+------+------+------+------+--+ --| uid | name | 琴_成绩 |棋_成绩 | 书_成绩 | 画_成绩 | --+------+---------+------+------+------+------+--+ --| 1 | 刘备 | 95 | 60 | 95 | 70 | --| 2 | 关羽 | 70 | 85 | 80 | 80 | -- 执行sql select t1.uid ,name ,max(if(courseid='琴',score,0)) as `琴_成绩` ,max(if(courseid='棋',score,0)) as `棋_成绩` ,max(if(courseid='书',score,0)) as `书_成绩` ,max(if(courseid='画',score,0)) as `画_成绩` from scores t1 left outer join userinfo t2 on t1.uid = t2.uid group by t1.uid ,name ; --查询结果 t1.uid name 琴_成绩 棋_成绩 书_成绩 画_成绩 1 刘备 100 99 88 77 2 关羽 60 50 0 0 3 赵云 70 0 0 0 4 张飞 99 77 100 80