shell 脚本 中的 函数、窗口函数中order by多字段时desc的用法、关于sql中的空值null及处理、增存量对比(1)

shell 脚本 中的 函数

#!/bin/bash				 # 指定了shell脚本解释器的路径,而且这个指定路径只能放在文件的第一行。第一行写错或者不写时,系统会有一个默认的解释器进行解释。
db_cr_hub1_ip="`get_database_info mysql.cr_hub1.ip`"
db_cr_hub1_database="`get_database_info mysql.cr_hub1.database`"
db_cr_hub1_username="`get_database_info mysql.cr_hub1.username`"
db_cr_hub1_password="`get_database_info mysql.cr_hub1.password`"
db_amarmonitor_ip="`get_database_info other.amarmonitor`"
impala_username="`get_database_info impala.username`"
impalad_host=`choose_impalad`
get_col(){					 # shell中的函数
local s=$1                   # $1 该脚本的第一个参数
local col_num=$2			 # local 声明局部变量
local col_str=$3

let "col_num--"				 # let 命令和双小括号 (( )) 的用法是类似的,它们都是用来对整数进行运算,对col_num进行--操作

local ss=${s#*${col_str}}

if [[ ${col_num} -eq 0 ]]	 # if语句
then
  echo ${ss%%${col_str}*}
else
  local sss=${ss#*${col_str}}
  get_col "$sss" $col_num "$col_str"
fi
}
…………

窗口函数中order by多字段时desc的用法

row_number() over(partition by t1.companycode,t1.mainunderwriter,t1.accountingfirm,t1.legaladvisor,t1.status order by t1.enddate,t1.eutime desc) as enddate_number  --此时的desc仅对eutime生效

-- 想要desc对enddate也生效
row_number() over(partition by t1.companycode,t1.mainunderwriter,t1.accountingfirm,t1.legaladvisor,t1.status order by t1.enddate desc,t1.eutime desc) as enddate_number

关于sql中的空值null及处理

对于空值字段需要进行预处理 -- null不支持大小/相等判断

关于sql中的空值null及处理

增存量对比(1)

use fin_dw;

--将过去第7天的数据插入到历史分区
insert into table app_api_sec_pre_ipo_situation_di partition (dt,data_status) -- qpp层以天和数据状态(新增\更新\失效)分区
select 
		ent_name,
		ent_code,
		addr,
		ind,
		guar_inst,
		guar_per,
		acct_firm,
		sign_acct,
		lawyer_firm,
		sign_lawyer,
		list_status,
		is_submit_fin_rpt,
		end_date,
		pre_list_market,
		remark1,
		fin_type,
		remark2,
		update_date,
		dw_ins_date,
		query_org_id,
	    'his' as dt,
	    data_status
 from app_api_sec_pre_ipo_situation_di
 where dt >=concat(from_unixtime(unix_timestamp(days_sub(now(), 7)), 'yyyyMMdd'),'0000') -- 过去第七天0点
   and dt <=concat(from_unixtime(unix_timestamp(days_sub(now(), 7)), 'yyyyMMdd'),'2359');-- 过去第七天23:59

--删除过去第7天的数据
alter table app_api_sec_pre_ipo_situation_di drop partition(dt between concat(from_unixtime(unix_timestamp(days_sub(now(),7)),'yyyyMMdd'),'0000') and concat(from_unixtime(unix_timestamp(days_sub(now(),7)),'yyyyMMdd'),'2359')); -- 删除过去第七天的分区

--拿目标表中最新数据插入临时表 
insert overwrite table t_app_api_sec_pre_ipo_situation_di_01
select tmp.ent_name,tmp.ent_code,tmp.guar_inst,tmp.acct_firm,tmp.lawyer_firm,tmp.list_status,tmp.update_date,tmp.query_org_id
from (select ent_name,ent_code,guar_inst,acct_firm,lawyer_firm,list_status,update_date,query_org_id,
             row_number() over(partition by ent_code,guar_inst,acct_firm,lawyer_firm,list_status,query_org_id order by nvl(update_date,'1990-01-01') desc)  as rn
      from app_api_sec_pre_ipo_situation_di ) tmp 
where tmp.rn=1; -- 将表中最新的数据插入临时表

compute stats t_app_api_sec_pre_ipo_situation_di_01; -- 收集卷信息, 表列和分区的数据分布情况;收集的信息存储在元数据库中,用于优化Impala查询

--将新增数据插入到目标表
insert overwrite app_api_sec_pre_ipo_situation_di partition (dt,data_status)
select 	a.ent_name,
		a.ent_code,
		a.addr,
		a.ind,
		a.guar_inst,
		a.guar_per,
		a.acct_firm,
		a.sign_acct,
		a.lawyer_firm,
		a.sign_lawyer,
		a.list_status,
		a.is_submit_fin_rpt,
		a.end_date,
		a.pre_list_market,
		a.remark1,
		a.fin_type,
		a.remark2,
		a.update_date,
		from_unixtime(unix_timestamp(now())) as dw_ins_date,
		a.bankid,
	    from_unixtime(unix_timestamp(now()),'yyyyMMddHHmm') as dt,
	    case when a.data_status='1' then 'D'
			 when b.ent_name is null then 'I'
			 when b.ent_name is not null then 'U' end as data_status -- 判断数据状态
from (
	select a1.*,a2.bankid
	from(
		select * 
		from dwd_sec_pre_ipo_situation_dd
		where dt = regexp_replace(to_date(subdate(now(), 1)), '-', '') -- 昨天的分区
		and update_date>=to_timestamp(to_date(subdate(now(), 3)),'yyyy-MM-dd') --为了防止丢数据,拿近三天更新时间的数据进行增存量判断
	) a1 cross join (
			select distinct bankid
			from fin_dw.interface_account_info where bankid = 'zhonghangjiangsuBANK'
	) a2
) a
left join t_app_api_sec_pre_ipo_situation_di_01 b
on a.ent_code=b.ent_code and a.guar_inst=b.guar_inst and a.acct_firm=b.acct_firm and a.lawyer_firm=b.lawyer_firm and a.list_status=b.list_status and a.bankid = b.query_org_id  -- 按照主键关联,让数据一一对应
where b.ent_name is null or (b.ent_name is not null and a.update_date > b.update_date); -- 过滤新增数据或更新数据
posted @ 2022-06-28 12:53  赤兔胭脂小吕布  阅读(219)  评论(0)    收藏  举报