# hive中同列多行数据组合的方法以及array to string要点(行转列)

1. 同列多行数据组合成一个字段cell的方法， top N 问题的hive方案 如下：

hive 列转行 to json与to array  list set等复杂结构，hive topN的提取的窗口统计方法

select
ll,
collect_list(n) ,  -- 将topN 转换成 List or Json with the help of collect_set(xx) collect_list(xx)
collect_list(nn),
collect_list(ll),
collect_list(dd)
from
(
select
concat('\'', n, '\'') as nn,
n                          ,
ll                         ,
concat_ws(":", concat('\\\'', n, '\\\''), ll) as dd ,

row_number() over (partition by ll order by n desc ) as num1     -- 某用户的所有文章点击率排序
from
(
select 1 as n, '4' as ll

UNION all

SELECT 2 as n, '4' as ll

UNION all

select 3 as n, '5' as ll

UNION all

SELECT 4 as n, '5' as ll
UNION all

SELECT 5 as n, '4' as ll

UNION all

select 6 as n, '5' as ll

UNION all

SELECT 7 as n, '5' as ll

UNION all

SELECT 8 as n, '4' as ll

UNION all

select 9 as n, '5' as ll

UNION all

SELECT 10 as n, '5' as ll
)
a

)
c
where num1 <= 3      -- 筛选top 3
group by
ll

2. 建表存储list类型数据的方法以及注意点

CREATE TABLE if not exists celebrity_basic_info
(
author_id bigint COMMENT 'id',

area array<string> COMMENT '复杂类型的数据'

) COMMENT '-----'
PARTITIONED BY
(
dt string
)

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ',' STORED AS textfile ;  --这里要注意存储时的序列化转换 collection item 的分隔符。

3. 将array<string>类型数据转成string显示的方法

select
author_id       ,
concat( case when size(area)=-1 then '[' else '["' end,concat_ws('","'  , area )            , case when size(area)=-1 then ']' else '"]' end)  --组装拼接成json list
from  celebrity_basic_info

posted @ 2016-12-22 20:00  fandyst  阅读(15675)  评论(0编辑  收藏  举报