hive 日常技巧

 
--删除表中重复数据
delete from vitae a
where (a.peopleId,a.seq) in   (select peopleId,seq from vitae group by peopleId,seq having count(*) > 1)
and rowid not in (select min(rowid) from vitae group by peopleId,seq having count(*)>1)
  
--建表 
hadoop fs -put data.txt hdfs_path
drop table tablename;
CREATE TABLE tablename
(
column1 string,
column2 string
)
partitioned by (column2 string,columns4 string)
row format delimited
fields terminated by ','
stored as textfile
;
 
load data inpath 'hdfs_path/data.txt' into table tablename;


--添加表字段

alter table table_name add columns(column1 int, column2 int);  
 
--显示列名
Set hive.cli.print.header = true;
 
--返回星期几(0~6)
pmod(datediff(date, '1920-01-01') - 3, 7
 
--显示表规格
desc formatted table;
 
--生成success文件
hadoop fs -touchz hdfs/_SUCCESS

--删除分区
ALTER TABLE table_name DROP IF EXISTS PARTITION (dt='2008-08-08');

--hive配置Map输入合并

-- 每个Map最大输入大小,决定合并后的文件数
set mapred.max.split.size = 256000000;
-- 一个节点上split的至少的大小 ,决定了多个data node上的文件是否需要合并
set mapred.min.split.size.per.node = 100000000;
-- 一个交换机下split的至少的大小,决定了多个交换机上的文件是否需要合并
set mapred.min.split.size.per.rack = 100000000;
-- 执行Map前进行小文件合并
set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 
 
  
  

  

posted on 2018-06-12 10:49  包包大人_silov  阅读(239)  评论(0)    收藏  举报