增存量对比(1)变式

-- 函数在fin_dw下
use fin_dw;

--将过去第7天的数据插入到历史分区
insert into table fin_dw_part.app_api_sec_bond_party_di partition (dt,data_status)
select  serial_no,
		bond_compre_code,
		announ_date,
		party_type_code,
		party_type,
		party_code,
		party_full_name,
		party_short_name,
		leg_rep,
		ent_addr,
		ent_url,
		contact_per,
		contact_tel,
		fax,
		email,
		use_start_date,
		use_end_date,
        update_date,
        dw_ins_date,
        'his' as dt,
        data_status
 from fin_dw_part.app_api_sec_bond_party_di
 where dt >=concat(from_unixtime(unix_timestamp(days_sub(now(), 7)), 'yyyyMMdd'),'0000')
   and dt <=concat(from_unixtime(unix_timestamp(days_sub(now(), 7)), 'yyyyMMdd'),'2359');

--删除过去第7天的数据
alter table fin_dw_part.app_api_sec_bond_party_di drop partition(dt between concat(from_unixtime(unix_timestamp(days_sub(now(),7)),'yyyyMMdd'),'0000') and concat(from_unixtime(unix_timestamp(days_sub(now(),7)),'yyyyMMdd'),'2359'));

--将新增数据插入到目标表
insert overwrite fin_dw_part.app_api_sec_bond_party_di partition (dt,data_status)
select  getmd5_v2(concat(nvl(a.bond_compre_code,''),nvl(a.party_type_code,''),nvl(a.party_code,''),nvl(a.use_start_date,''))) as serial_no,
		a.bond_compre_code,
		a.announ_date,
		a.party_type_code,
		a.party_type,
		a.party_code,
		a.party_full_name,
		a.party_short_name,
		a.leg_rep,
		a.ent_addr,
		a.ent_url,
		a.contact_per,
		a.contact_tel,
		a.fax,
		a.email,
		a.use_start_date,
		a.use_end_date,
        a.update_date,
        from_unixtime(unix_timestamp(now())) as dw_ins_date,
        from_unixtime(unix_timestamp(now()),'yyyyMMddHHmm') as dt,
        case when a.data_status='1' then 'D'
             when b.bond_compre_code is null then 'I'
             else 'U' end as data_status
from (
        select * 
        from fin_dw_part.dwd_sec_bond_party_dd --该表数据为180万条左右(数据量不大的,可以不用取近7天的数据)
        where dt = regexp_replace(to_date(subdate(now(), 1)), '-', '')
        --and update_date>=to_timestamp(to_date(subdate(now(), 7)),'yyyy-MM-dd') --为了防止丢数据,拿近7天更新时间的数据进行增存量判断
) a
left join (select * 
           from(select *,
                       row_number() over(partition by bond_compre_code,party_type_code,party_full_name,nvl(str_to_time(use_start_date),'') order by nvl(update_date,'1900-01-01 00:00:00') desc ) as rn 
                from fin_dw_part.app_api_sec_bond_party_di)as tmp 
           where rn=1) b -- 拿取app_api_sec_bond_party_di表中最新数据进行增存量判断
on a.bond_compre_code=b.bond_compre_code and a.party_type_code=b.party_type_code and a.party_full_name=b.party_full_name and nvl(str_to_time(a.use_start_date),'')=nvl(str_to_time(b.use_start_date),'') --use_start_date生产环境格式不同,还可能为空值
where b.bond_compre_code is null -- 没关联上,新数据 
   or (b.bond_compre_code is not null and 
nvl(a.update_date,'') > nvl(b.update_date,'')) -- 关联上了,新增数据
;
posted @ 2022-07-20 20:03  赤兔胭脂小吕布  阅读(33)  评论(0)    收藏  举报