hive_非常用

1.count(distinct xxx) 窗口函数

hive 2.x版本支持：

count(distinct cust_num) over(partition by xxx order by xxx) -- 分组内去重求和

hive1.x版本不支持：改版

size(collect_set(cust_num) over(partition by xxx order by xxx)) -- 分组内去重求和

collect_set ： set集合，没有重复元素

collect_list ：list列表，可以有重复元素

select collect_list(value)
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,3,2,2]

select collect_set(value)
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,3,2]

3.排序

sort_array

select sort_array(collect_set(value))
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,2,3]

4.集合元素连接：

select concat_ws('-','1','2','3')
1-2-3

select concat_ws('-',collect_set(cast(value as string)))
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

1-3-2

posted @ 2019-06-28 14:31 问题不大1 阅读(357) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

问题不大

hive_非常用

公告