HQL高级

**--** SQL关键词执行顺序
from > where条件 > group by > having条件>select>order by>limit
注意:sql一旦出现group by,后续的关键词能够操作字段只有(分组依据字段,组函数处理结果)
# 0. 各个数据类型的字段访问(array、map、struct)
# array类型:字段名[index];map类型:字段名[key];struct类型:字段名.属性名
select name,salary,hobbies[1],cards['123456'],addr.city from t_person;
# 1. 条件查询:= != >= <=
select * from t_person where addr.city='郑州';
# 2. and or between and
# array_contains(字段,值):函数,针对array类型的字段,判断数组里面包含指定的值
select * from t_person where salary>5000 and array_contains(hobbies,'抽烟');
# 3. order by[底层会启动mapreduce进行排序]
select * from t_person order by salary desc;
# 4. limit(hive没有起始下标)
select * from t_person sort by salary desc limit 5;
# 5. 去重
select distinct addr.city  from t_person;
select distinct(addr.city) from t_person;

表连接

select ...
from table1 t1 left join  table2 t2 on 条件
where 条件
group by
having


例:--
查询性别不同,但是薪资相同的人员信息。
select 
    t1.name,t1.sex,t1.salary,
    t2.name,t2.sex,t2.salary
from t_person t1 join t_person t2 on t1.salary = t2.salary 
where t1.sex != t2.sex;

单行函数(show functions)


# 单行函数(show functions) --对一行数据进行操作
#查看所有函数
-- 查看hive系统所有函数 
show functions;
--函数的使用:函数名(参数)

1. array_contains(列,值);--判断数组列中是否包含指定的值
select name,hobbies from t_person where array_contains(hobbies,'喝酒');
2. length(列)--获取到长度
select length('123123');
3. concat(列,列)--拼接
select concat('123123','aaaa');
4. to_date('1999-9-9')--字符串转换成日期
select to_date('1999-9-9');
5. year(date)--获取日期类型的年,month(date)--获取日期类型的月份,
6. date_add(date,数字)--日期加多少天
select name,date_add(birthday,-9) from t_person;

组函数

# 组函数:聚合函数,做统计的:由多行数据计算完成之后获得一行数据
概念:
max、min、sum、avg、count等。

select max(salary) from t_person where addr.city='北京';
select count(id) from t_person;

炸裂函数(集合函数)(explode())

# 炸裂函数(集合函数)(explode()):由一行数据计算完成之后获得多行数据
-- 查询所有的爱好,
select explode(hobbies) as hobby from t_person


# lateral view 
-- 为指定表,的边缘拼接一个列。(类似表连接)
-- lateral view:为表的拼接一个列(炸裂结果)
-- 语法:from 表 lateral view explode(数组字段) 别名 as 字段名;


-- 查看id,name,爱好。一个爱好一条信息。
select id,name,hobby
from t_person lateral view explode(hobbies) t_hobby as hobby

分组


1. group by(查看各个城市的均薪)
select addr.city,avg(salary) from t_person group by addr.city;
2. having(查看平均工资超过5000的城市和均薪)
select addr.city,avg(salary) from t_person group by addr.city having avg(salary)>5000;
3. 统计各个爱好的人数
--explod+lateral view
select hobby,count( * )
from t_person lateral view explode(hobbies) t_hobby as hobby
group by hobby;
4. 统计最受欢迎的爱好TOP1
SELECT hb,count( * ) num
	from t_person lateral view explode(hobbies) h as hb
	group by hb
	order by num desc limit 1;

子查询

# 子查询
-- 统计有哪些爱好,并去重。
select distinct t.hobby from 
(select explode(hobbies) as hobby from t_person ) t
posted @ 2020-12-04 09:00  花红  阅读(119)  评论(0)    收藏  举报