Hive Sql
1.创建复杂类型sql表
CREATE TABLE `person`( `name` string COMMENT 'name', `work_locations` array<string> COMMENT 'work_locations', `score` map<string,int> COMMENT 'score', `course` struct<course:string,score:int> COMMENT 'course') ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 'hdfs://x/user/yy/tmp.db/person' TBLPROPERTIES ( 'transient_lastDdlTime'='1646984872')
LOAD DATA LOCAL INPATH '/home/hadoop/person.txt' OVERWRITE INTO TABLE person;
2.literal explode
select
dt
,count(distinct id) as uv
,count(1) as pv
from
(
select
dt
,id
,pr
from t1
where dt >= '02-31' and dt <= '10-01'
and eid = 'XXX'
and split(pr,'#')[0] in ('qqq')
)table_a LATERAL VIEW explode(split(pr, '_')) ad as v
where split(v,'\\$')[1] in ('12','23')
group by dt
3.合并小文件
1.合并csv 小文件: hadoop fs -getmerge /user//part-* a.txt 2.中文显示乱码 iconv -f UTF-8 -t gb18030 a.txt -o ./res.csv
https://blog.csdn.net/u010670689/article/details/72885944

浙公网安备 33010602011771号