hive备忘

use ods_tm;

set mapred.queue.name= ods;
set mapred.job.queue.name= ods;
set hive.fetch.task.conversion=more;  --simple select without job
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.cli.print.header=true;
set hive.exec.reducers.max=200;
set hive.exec.compress.output=false;
set hive.exec.compress.intermediate=true;
set mapred.max.split.size=512000000;
set mapred.min.split.size.per.node=512000000;
set mapred.min.split.size.per.rack=512000000;
set hive.exec.reducers.bytes.per.reducer=530000000;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=16;



show functions;
show tables like '*tm_*';

desc function to_date;    --function description
desc formatted tablename; --show table meta


--create table
create table tm_user
(
userid             double 
,usercode          string 
,username          string
)
row format delimited fields terminated by '\001';

create external table tm_cust
(
custid             double comment '自增序号'
,custcode          string comment '客户编码'
,custname          string comment '客户名称'
,load_dt           string comment '加载时间'
)comment '客户维表'
partitioned by(inc_month string,inc_day string,source_flag string)
row format delimited fields terminated by '\001'
location  '/ods/tm/tm_cust';

--insert
insert into table tm_cust partition(inc_month='201601',inc_day='15',source_flag)
select 
custid 
,custcode
,custname
,load_dt
,'1'
from tm_cust;

--col->row
select k,v
from 
(
  select
  regexp_replace(col1,'[^\\\\u4E00-\\\\u9FFF\\\\x20-\\\\x2e\\\\x30-\\\\x5b\\\\x5d-\\\\x7b\\\\x7d\\\\x7e]','') c1,
  regexp_replace(col2,'[^\\\\u4E00-\\\\u9FFF\\\\x20-\\\\x2e\\\\x30-\\\\x5b\\\\x5d-\\\\x7b\\\\x7d\\\\x7e]','') c2,
  from_unixtime(unix_timestamp(TRANSACTIONDATE),'yyyy-MM-dd HH:mm:ss') c3
  from tablename
) t
lateral view explode (map('c1',c1,'c2',c2,'c3',c3)) t2 as k, v

--python command1
#!/usr/bin/python
#coding=utf-8
import sys
import datetime
import time

#datetime to string
def timestamp_toString(stamp):
    return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(stamp))

for line in sys.stdin:
    print timestamp_toString(float(line))

--python command2
#!/usr/bin/python
#coding=utf-8
import sys
for line in sys.stdin:
  line = line.strip()
  print line+'@'
    
--add python file to hdfs
add file /pyscript/udf.py;
--use py file
select transform(load_tm) using 'python udf.py' as (load_tm) from dual;


--export to loacl
insert overwrite local directory '/home/data/tm_cust'
row format delimited
fields terminated by '\t'
select * from tm_cust;

--export to hdfs
insert overwrite directory '/user/result/tm_cust' select * from tm_cust;

--linux cmd --export data
hive -e 'use ods; select * from ods.tm_user limit 10;' > /tmp/tm_user.csv

--linux cmd --execute sql file
hive -hivevar parm1=${val1} -hivevar parm2=${val2} -f "p_sync_tm_user.sql" &

--linux cmd --kill job
/app/hadoop/bin/hadoop job  -kill job_1452352207225_181385

--data input hive
load data local inpath '/tmp/1.txt' overwrite into table tmp_import;
--linux vim cmd --replace all ',' to '\001'
:% s/,/ctrl+a/g

--linux cmd --sum
hive -Sv -e "set hive.fetch.task.conversion=more;select 1 from tb limit 10;"|awk '{sum+=$1} END {print sum}'
--linux cmd --print first row
hive -Sv -e "set hive.fetch.task.conversion=more;select 1 from tb limit 10;"|sed -n '1p'

 

posted @ 2016-07-14 16:35  czlblog  阅读(160)  评论(0)    收藏  举报