hive_非常用

1.count(distinct xxx) 窗口函数    

hive 2.x版本支持:

  count(distinct cust_num) over(partition by xxx order by xxx)     -- 分组内去重求和

hive1.x版本不支持:改版

 size(collect_set(cust_num) over(partition by xxx order by xxx)) -- 分组内去重求和

 2.

collect_set : set集合,没有重复元素

collect_list :list列表,可以有重复元素

select collect_list(value)
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,3,2,2]
select collect_set(value)
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,3,2]

 

3.排序

sort_array

select sort_array(collect_set(value))
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,2,3]

4.集合元素连接:

select concat_ws('-','1','2','3')
1-2-3
select concat_ws('-',collect_set(cast(value as string)))
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

1-3-2

 

posted @ 2019-06-28 14:31  问题不大1  阅读(357)  评论(0编辑  收藏  举报