-- 函数在fin_dw下
use fin_dw;
--将过去第7天的数据插入到历史分区
insert into table fin_dw_part.app_api_sec_bond_party_di partition (dt,data_status)
select serial_no,
bond_compre_code,
announ_date,
party_type_code,
party_type,
party_code,
party_full_name,
party_short_name,
leg_rep,
ent_addr,
ent_url,
contact_per,
contact_tel,
fax,
email,
use_start_date,
use_end_date,
update_date,
dw_ins_date,
'his' as dt,
data_status
from fin_dw_part.app_api_sec_bond_party_di
where dt >=concat(from_unixtime(unix_timestamp(days_sub(now(), 7)), 'yyyyMMdd'),'0000')
and dt <=concat(from_unixtime(unix_timestamp(days_sub(now(), 7)), 'yyyyMMdd'),'2359');
--删除过去第7天的数据
alter table fin_dw_part.app_api_sec_bond_party_di drop partition(dt between concat(from_unixtime(unix_timestamp(days_sub(now(),7)),'yyyyMMdd'),'0000') and concat(from_unixtime(unix_timestamp(days_sub(now(),7)),'yyyyMMdd'),'2359'));
--将新增数据插入到目标表
insert overwrite fin_dw_part.app_api_sec_bond_party_di partition (dt,data_status)
select getmd5_v2(concat(nvl(a.bond_compre_code,''),nvl(a.party_type_code,''),nvl(a.party_code,''),nvl(a.use_start_date,''))) as serial_no,
a.bond_compre_code,
a.announ_date,
a.party_type_code,
a.party_type,
a.party_code,
a.party_full_name,
a.party_short_name,
a.leg_rep,
a.ent_addr,
a.ent_url,
a.contact_per,
a.contact_tel,
a.fax,
a.email,
a.use_start_date,
a.use_end_date,
a.update_date,
from_unixtime(unix_timestamp(now())) as dw_ins_date,
from_unixtime(unix_timestamp(now()),'yyyyMMddHHmm') as dt,
case when a.data_status='1' then 'D'
when b.bond_compre_code is null then 'I'
else 'U' end as data_status
from (
select *
from fin_dw_part.dwd_sec_bond_party_dd --该表数据为180万条左右(数据量不大的,可以不用取近7天的数据)
where dt = regexp_replace(to_date(subdate(now(), 1)), '-', '')
--and update_date>=to_timestamp(to_date(subdate(now(), 7)),'yyyy-MM-dd') --为了防止丢数据,拿近7天更新时间的数据进行增存量判断
) a
left join (select *
from(select *,
row_number() over(partition by bond_compre_code,party_type_code,party_full_name,nvl(str_to_time(use_start_date),'') order by nvl(update_date,'1900-01-01 00:00:00') desc ) as rn
from fin_dw_part.app_api_sec_bond_party_di)as tmp
where rn=1) b -- 拿取app_api_sec_bond_party_di表中最新数据进行增存量判断
on a.bond_compre_code=b.bond_compre_code and a.party_type_code=b.party_type_code and a.party_full_name=b.party_full_name and nvl(str_to_time(a.use_start_date),'')=nvl(str_to_time(b.use_start_date),'') --use_start_date生产环境格式不同,还可能为空值
where b.bond_compre_code is null -- 没关联上,新数据
or (b.bond_compre_code is not null and
nvl(a.update_date,'') > nvl(b.update_date,'')) -- 关联上了,新增数据
;