1.1 ods层的数据(增量数据,将新增和修改的数据导入ods)

sqoop语法是每天将get_time 或者using_time或者used_time为当前分区时间,或者优惠券状态发生改变的数据导入ods:

drop table if exists ods_coupon_use;
create external table ods_coupon_use(
`id` string COMMENT '编号',
`coupon_id` string COMMENT '优惠券 ID',
`user_id` string COMMENT 'userid',
`order_id` string COMMENT 'orderid',
`coupon_status` string COMMENT '优惠券状态',
`get_time` string COMMENT '领取时间',
`using_time` string COMMENT '使用时间(下单)',
`used_time` string COMMENT '使用时间(支付)'
) COMMENT '优惠券领用表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_coupon_use/';
1.2 分析ods数据
使用到ods表的字段:
coupon_id
user_id
order_id
get_time
using_time
used_time
status

1025优惠券业务数据库中的数据
1,1001, user01, order_001, 2020-10-25 10:00:00, null, null, 1

1026优惠券业务数据库中的数据
1,1001, user01, order_001, 2020-10-25 10:00:00, 2020-10-26 00:01:43, 2020-10-26 00:02:00, 1
2,2001, user10, order_10, 2020-10-26 9:01:00, 2020-10-26 11:20:00, null , 1
3,3001, user30, order_30, 2020-10-26 10:00:00, null, null , 1

1027优惠券业务数据库中的数据
1,1001, user01, order_001, 2020-10-25 10:00:00, 2020-10-26 00:01:43, 2020-10-26 00:02:00 , 0
2,2001, user10, order_10, 2020-10-26 9:01:00, 2020-10-26 11:20:00, 2020-10-27 11:20:00 , 1
3,3001, user30, order_30, 2020-10-26 10:00:00, 2020-10-27 11:20:00, 2020-10-27 13:00:00 , 1
4,4001, user40, order_40, 2020-10-27 08:08:00, null, null, 1
1.3 创建累积型快照事实表
drop table if exists dwd_fact_coupon_use;
create external table dwd_fact_coupon_use(
`id` string COMMENT '编号',
`coupon_id` string COMMENT '优惠券 ID',
`user_id` string COMMENT 'userid',
`order_id` string COMMENT '订单 id',
`coupon_status` string COMMENT '优惠券状态',
`get_time` string COMMENT '领取时间',
`using_time` string COMMENT '使用时间(下单)',
`used_time` string COMMENT '使用时间(支付)'
) COMMENT '优惠券领用事实表'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_coupon_use/'
tblproperties ("parquet.compression"="lzo");
--dt是按照优惠券领用时间get_time做为分区
1.4 待验证的方式
--使用动态分区 根据get_time进行动态分区,默认以查询出来的最后一列为分区字段
set hive.exec.dynamici.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_fact_coupon_use partition (dt)
select 
      if(b.id is not null , b.id, a.id) as id, 
      if(b.coupon_id is not  null, b.coupon_id, a.coupon_id) as coupon_id, 
      if(b.user_id is not null, b.user_id, a.user_id) as user_id,
      if(b.order_id is not null, b.order_id, a.order_id) as order_id,
      if(b.using_time is not null, b.using_time, a.using_time) as using_time,
      if(b.used_time is not null, b.used_time, a.used_time) as used_time,
      if(b.flag is not null, b.flag, a.flag) as flag,
	  if(b.get_time is not null, b.get_time, a.get_time) as get_time,
	 from(
      select 
            id, 
		    coupon_id,
		    user_id, 
		    order_id, 
		    get_time, 
		    using_time, 
		    used_time,
		    flag
           from dwd_fact_coupon_use where dt in (
            select 
                  get_time 
				 from ods_coupon_use 
				 where dt = '2020-10-26')  a 
		   full outer join (
		    select 
                  id,
				  coupon_id,
				  user_id, 
				  order_id, 
				  get_time, 
				  using_time, 
				  used_time,
				  flag
                 from ods_coupon_use where dt ='2020-10-26') b 
		   on a.id = b.id
posted on 2020-11-29 18:49  jeasonchen001  阅读(743)  评论(0编辑  收藏  举报