Hive结合shell脚本实现自动化业务

来源于   https://blog.csdn.net/weixin_40652340/article/details/78788922

 

【案例】hive脚本加载数据到hive分区表
access_logs/20170610/2017061000.log
2017061001.log
2017061002.log
......
                     2017061023.log

二级分区:天/小时
crontab+shell 实现自动调度。

建库:
create database load_hive;

建表:
create table load_h(
id              string,
url             string,
referer         string,
keyword         string,
type            string,
guid            string,
pageId          string,
moduleId        string,
linkId          string,
attachedInfo    string,
sessionId       string,
trackerU        string,
trackerType     string,
ip              string,
trackerSrc      string,
cookie          string,
orderCode       string,
trackTime       string,
endUserId       string,
firstLink       string,
sessionViewNo   string,
productId       string,
curMerchantId   string,
provinceId      string,
cityId          string,
fee             string,
edmActivity     string,
edmEmail        string,
edmJobId        string,
ieVersion       string,
platform        string,
internalKeyword string,
resultSum       string,
currentPage     string,
linkPosition    string,
buttonPosition  string
)
partitioned by (date string,hour string)
row format delimited fields terminated by '\t'
stored as textfile;

--hiveconf  指定参数
key=value

show partitions load_hive.load_h;  --查看表分区情况。

 

通过Shell脚本执行:load_to_hive.h

 

  1.  
    #!/bin/bash
  2.  
     
  3.  
    #load
  4.  
     
  5.  
    #define the date of yesterday
  6.  
    YESTERDAY=`date -d '-1 days' +%Y%m%d`
  7.  
     
  8.  
    #define log dir
  9.  
    ACCESS_LOGS_DIR=/opt/datas/access_logs/$YESTERDAY
  10.  
     
  11.  
    #define hive home
  12.  
    HIVE_HOME=/opt/cdh5/hive-0.13.1-cdh5.3.6
  13.  
     
  14.  
     
  15.  
    #load
  16.  
    for FILE in `ls $ACCESS_LOGS_DIR`
  17.  
    do
  18.  
    Day=${FILE:0:8}
  19.  
    Hour=${FILE:8:2}
  20.  
    echo "${Day}+${Hour}"
  21.  
    $HIVE_HOME/bin/hive -e "load data local inpath '$ACCESS_LOGS_DIR/$FILE' into table load_hive.load_h partition(date='${Day}',hour='${Hour}')"
  22.  
    done
  23.  
    $HIVE_HOME/bin/hive -e "show partitions load_hive.load_h"

 

通过Shell脚本及可执行SQL文件执行:load_to_hive_f.sh

 

  1.  
    #!/bin/bash
  2.  
     
  3.  
    #load
  4.  
     
  5.  
    #define the date of yesterday
  6.  
    YESTERDAY=`date -d '-1 days' +%Y%m%d`
  7.  
     
  8.  
    #define log dir
  9.  
    ACCESS_LOGS_DIR=/opt/datas/access_logs/$YESTERDAY
  10.  
     
  11.  
    #define hive home
  12.  
    HIVE_HOME=/opt/cdh5/hive-0.13.1-cdh5.3.6
  13.  
     
  14.  
    #load
  15.  
    for FILE in `ls $ACCESS_LOGS_DIR`
  16.  
    do
  17.  
    Day=${FILE:0:8}
  18.  
    Hour=${FILE:8:2}
  19.  
    echo "${Day}+${Hour}"
  20.  
    $HIVE_HOME/bin/hive --hiveconf log_dir=$ACCESS_LOGS_DIR --hiveconf file_path=$FILE --hiveconf DAY=$Day --hiveconf HOUR=$Hour -f /opt/datas/hive_script/load.sql
  21.  
    done
  22.  
    $HIVE_HOME/bin/hive -e "show partitions load_hive.load_h"

 

SQL文件:load.sql

 

load data local inpath '${hiveconf:log_dir}/${hiveconf:file_path}' into table load_hive.load_h partition(date='${hiveconf:DAY}',hour='${hiveconf:HOUR}');




 

posted @ 2020-08-09 16:32  苦行者的刀  阅读(483)  评论(0)    收藏  举报