use ods_tm;
set mapred.queue.name= ods;
set mapred.job.queue.name= ods;
set hive.fetch.task.conversion=more; --simple select without job
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.cli.print.header=true;
set hive.exec.reducers.max=200;
set hive.exec.compress.output=false;
set hive.exec.compress.intermediate=true;
set mapred.max.split.size=512000000;
set mapred.min.split.size.per.node=512000000;
set mapred.min.split.size.per.rack=512000000;
set hive.exec.reducers.bytes.per.reducer=530000000;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=16;
show functions;
show tables like '*tm_*';
desc function to_date; --function description
desc formatted tablename; --show table meta
--create table
create table tm_user
(
userid double
,usercode string
,username string
)
row format delimited fields terminated by '\001';
create external table tm_cust
(
custid double comment '自增序号'
,custcode string comment '客户编码'
,custname string comment '客户名称'
,load_dt string comment '加载时间'
)comment '客户维表'
partitioned by(inc_month string,inc_day string,source_flag string)
row format delimited fields terminated by '\001'
location '/ods/tm/tm_cust';
--insert
insert into table tm_cust partition(inc_month='201601',inc_day='15',source_flag)
select
custid
,custcode
,custname
,load_dt
,'1'
from tm_cust;
--col->row
select k,v
from
(
select
regexp_replace(col1,'[^\\\\u4E00-\\\\u9FFF\\\\x20-\\\\x2e\\\\x30-\\\\x5b\\\\x5d-\\\\x7b\\\\x7d\\\\x7e]','') c1,
regexp_replace(col2,'[^\\\\u4E00-\\\\u9FFF\\\\x20-\\\\x2e\\\\x30-\\\\x5b\\\\x5d-\\\\x7b\\\\x7d\\\\x7e]','') c2,
from_unixtime(unix_timestamp(TRANSACTIONDATE),'yyyy-MM-dd HH:mm:ss') c3
from tablename
) t
lateral view explode (map('c1',c1,'c2',c2,'c3',c3)) t2 as k, v
--python command1
#!/usr/bin/python
#coding=utf-8
import sys
import datetime
import time
#datetime to string
def timestamp_toString(stamp):
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(stamp))
for line in sys.stdin:
print timestamp_toString(float(line))
--python command2
#!/usr/bin/python
#coding=utf-8
import sys
for line in sys.stdin:
line = line.strip()
print line+'@'
--add python file to hdfs
add file /pyscript/udf.py;
--use py file
select transform(load_tm) using 'python udf.py' as (load_tm) from dual;
--export to loacl
insert overwrite local directory '/home/data/tm_cust'
row format delimited
fields terminated by '\t'
select * from tm_cust;
--export to hdfs
insert overwrite directory '/user/result/tm_cust' select * from tm_cust;
--linux cmd --export data
hive -e 'use ods; select * from ods.tm_user limit 10;' > /tmp/tm_user.csv
--linux cmd --execute sql file
hive -hivevar parm1=${val1} -hivevar parm2=${val2} -f "p_sync_tm_user.sql" &
--linux cmd --kill job
/app/hadoop/bin/hadoop job -kill job_1452352207225_181385
--data input hive
load data local inpath '/tmp/1.txt' overwrite into table tmp_import;
--linux vim cmd --replace all ',' to '\001'
:% s/,/ctrl+a/g
--linux cmd --sum
hive -Sv -e "set hive.fetch.task.conversion=more;select 1 from tb limit 10;"|awk '{sum+=$1} END {print sum}'
--linux cmd --print first row
hive -Sv -e "set hive.fetch.task.conversion=more;select 1 from tb limit 10;"|sed -n '1p'