HiveSQL

一、 基础查询

sublime 格式化 ctrl+k ctrl+f

-- 创建学生表
DROP TABLE IF EXISTS student_info;
create table if not exists student_info(
    stu_id string COMMENT '学生id',
    stu_name string COMMENT '学生姓名',
    birthday string COMMENT '出生日期',
    sex string COMMENT '性别'
) 
row format delimited fields terminated by ',' 
stored as textfile;

-- 创建课程表
DROP TABLE IF EXISTS course_info;
create table if not exists course_info(
    course_id string COMMENT '课程id',
    course_name string COMMENT '课程名',
    tea_id string COMMENT '任课老师id'
) 
row format delimited fields terminated by ',' 
stored as textfile;

-- 创建老师表
DROP TABLE IF EXISTS teacher_info;
create table if not exists teacher_info(
    tea_id string COMMENT '老师id',
    tea_name string COMMENT '老师姓名'
) 
row format delimited fields terminated by ',' 
stored as textfile;

-- 创建分数表
DROP TABLE IF EXISTS score_info;
create table if not exists score_info(
    stu_id string COMMENT '学生id',
    course_id string COMMENT '课程id',
    score int COMMENT '成绩'
) 
row format delimited fields terminated by ',' 
stored as textfile;


1.1 简单汇总查询

--1.查询数学成绩不及格的学生信息和其对应的数学学科成绩,按照学号升序排序
SELECT s.stu_id,
       s.stu_name,
       t1.score
FROM student_info s
JOIN
  ( SELECT *
   FROM score_info
   WHERE course_id=
       (SELECT course_id
        FROM course_info
        WHERE course_name='数学')
     AND score < 60 ) t1 ON s.stu_id = t1.stu_id
ORDER BY s.stu_id;

1.2分组查询 对分组结果条件查询

--2.查询平均成绩大于60分的学生的学号和平均成绩
select
    stu_id,
    avg(score) score_avg
from score_info
group by stu_id
having score_avg > 60;

1.3 按指定条件分组

--3.查询一共参加三门课程且其中一门为语文课程的学生的id和姓名
select
  si.stu_id,
  si.stu_name
from student_info si
         join
     (
         select stu_id,
                count(*) cc
         from score_info
         where stu_id in (select stu_id
                          from score_info
                          where course_id = (select course_id from course_info where course_name = '语文'))
         group by stu_id
         having count(*) = 3
     ) t1
on
  si.stu_id=t1.stu_id
--4.没有学全所有课的学生的学号、姓名
SELECT sti.stu_id,
       sti.stu_name,
       count(sci.course_id)
FROM student_info sti
LEFT JOIN score_info sci ON sti.stu_id = sci.stu_id
GROUP BY sti.stu_id,sti.stu_name
HAVING count(sci.course_id) < (SELECT count(*) FROM course_info)

1.4 多表join

--5.查询所有课程成绩在70分以上的学生的姓名、课程名称和分数
SELECT s.stu_id,
       s.stu_name,
       c.course_name,
       s2.score
FROM student_info s
JOIN
  ( SELECT stu_id,
           sum(if(score >= 70,0,1)) flage
   FROM score_info
   GROUP BY stu_id HAVING flage =0 ) t1 ON s.stu_id = t1.stu_id
JOIN score_info s2 ON s.stu_id = s2.stu_id
JOIN course_info c ON s2.course_id = c.course_id
ORDER BY S2.score

1.5 self join


--6.查询同一个学生的不同课程成绩相同的学生id
SELECT s1.stu_id,
       s1.course_id,
       s1.score
FROM score_info s1
JOIN score_info s2
WHERE s1.stu_id=s2.stu_id
  AND s1.score=s2.score
  AND s1.course_id<>s2.course_id
--7.查询课程编号为“01”的课程比“02”的课程成绩高的所有学生的学号
select
    s1.stu_id
from
(
    select
        sc1.stu_id,
        sc1.course_id,
        sc1.score
    from  score_info sc1
    where sc1.course_id ='01'
) s1
join
(
    select
        sc2.stu_id,
        sc2.course_id,
        score
    from score_info sc2
    where sc2.course_id ="02"
)s2
on s1.stu_id=s2.stu_id
where s1.score > s2.score;
--8.查询学过编号为“01”的课程并且也学过编号为“02”的课程的学生的学号、姓名
SELECT t1.stu_id AS `学号`,
       s.stu_name AS `姓名`
FROM
  (SELECT stu_id
   FROM score_info sc1
   WHERE sc1.course_id='01'
   AND stu_id IN
       (SELECT stu_id
        FROM score_info sc2
        WHERE sc2.course_id='02'))t1
JOIN student_info s ON t1.stu_id = s.stu_id;
--9.显示所有学生的所有课程的成绩以及平均成绩
SELECT ss.stu_id,
       ss.stu_name,
       ci.course_id,
       ci.course_name,
       s.score,
       t1.avg_score
FROM student_info ss
LEFT JOIN score_info s ON ss.stu_id = s.stu_id
LEFT JOIN course_info ci ON s.course_id = ci.course_id
LEFT JOIN
  (SELECT stu_id,
          avg(score) avg_score
   FROM score_info
   GROUP BY stu_id) t1 ON s.stu_id = t1.stu_id
ORDER BY t1.avg_score DESC;

二、 初级函数

user_id(用户id) gender(性别) birthday(生日)
sku_id(商品id) name(商品名称) category_id(分类id) from_date(上架日期) price(商品价格)
category_id(分类id) category_name(分类名称)
order_id(订单id) user_id(用户id) create_date(下单日期) total_amount(订单金额)
order_detail_id(订单明细id) order_id(订单id) sku_id(商品id) create_date(下单日期) price(商品单价) sku_num(商品件数)
user_id(用户id) ip_address(ip地址) login_ts(登录时间) logout_ts(登出时间)
sku_id(商品id) new_price(本次变更之后的价格) change_date(变更日期)
delivery_id(运单id) order_id(订单id) user_id(用户id) order_date(下单日期) custom_date(期望配送日期)
user1_id(用户1 id) user2_id(用户2 id)
user_id(用户id) sku_id(商品id) create_date(收藏日期)

三、 中级练习

四、 高级练习

--会话划分
select user_id,
       page_id,
       view_timestamp,
       concat(user_id, '-', sum(session_start_point) over (partition by user_id order by view_timestamp)) session_id
from (
         select user_id,
                page_id,
                view_timestamp,
                if(view_timestamp - lagts >= 60, 1, 0) session_start_point
         from (
                  select user_id,
                         page_id,
                         view_timestamp,
                         lag(view_timestamp, 1, 0) over (partition by user_id order by view_timestamp) lagts
                  from page_view_events
              ) t1
     ) t2;
--间隔登陆
--去重
select
userid,
login_ts
from session
group by userid,date_format(login,'yyyy-MM-ss')

--求本地登陆和上次登陆时间diff
select
userid,
lag(ts,0)over() lag
from t1

--划分session
select
if(lag>2,log,null)
from t2

--①按照sum结果分组
select
sum(if(datediff(login_date,dt)>2,1,0)) over(partition by user_id order by login_date) group  --获取大于2的标签
from t3


--②获取当前窗口最后一个数据
select
last_value(dt,true) over(partition by user_id order by login_date) group  --获取大于2的标签
from t3
--日期交叉
select
    brand,
    sum(datediff(end_date,start_date)+1) promotion_day_count
from
(
    select
        brand,
        max_end_date,
        if(max_end_date is null or start_date>max_end_date,start_date,date_add(max_end_date,1)) start_date,
        end_date
    from
    (
        select
            brand,
            start_date,
            end_date,
            max(end_date) over(partition by brand order by start_date rows between unbounded preceding and 1 preceding) max_end_date
        from promotion_info
    )t1
)t2
where end_date>start_date
group by brand;
--直播间同时在线最大人数
posted @ 2024-05-13 11:58  小花生hadoop  阅读(106)  评论(0)    收藏  举报