SQL_数据抽取及校验

1.数据抽取


create or replace procedure P_DATA_EXTRACT_0915 (TABLE_NAME in varchar2,START_DATE IN NUMBER,END_DATE IN NUMBER ) is
-- 作者:付博
-- 用途:用作信用卡回迁项目数据抽取
TABLE_SOURCE NVARCHAR2(100) ;
TABLE_TARGET NVARCHAR2(100) ;
FIELD_NAME NVARCHAR2(100);
MAX_DATE NUMBER;
MIN_DATE NUMBER ;
TOTAL_NUMBER NUMBER;
SQL_T VARCHAR2(32000);
page INTEGER;
EXTRACT_number NUMBER :=2000000 ; --每次抽取量
BEGIN_TIME date;
END_TIME date;
--TEST02 NVARCHAR2(100) :='a';
begin

--TEST



--select max(ETL_DATE)  INTO TEST01 from data_event_oldods ;
--DBMS_OUTPUT.put_line(TEST);
--DBMS_OUTPUT.put_line(TEST01);
--DBMS_OUTPUT.put_line(TEST02);

BEGIN_TIME := SYSDATE;
--1.编写源表和目标表
TABLE_SOURCE := 'DATA_'||TABLE_NAME|| '_OLDODS'; -- 源表
TABLE_TARGET := 'DATA_'||TABLE_NAME|| '_HQ';     -- 新表 别写反了!!!!
DBMS_OUTPUT.put_line('源表和目标表:'||TABLE_SOURCE||'-'||TABLE_TARGET);

--2.获取最大日期/最小日期/总数/抽取量/页数
select   NVL(max(ETL_DATE),0) INTO MAX_DATE from data_event_oldods ;
DBMS_OUTPUT.put_line('最大日期/最小日期:'||MAX_DATE||'-'||MIN_DATE);

select  count(1) INTO TOTAL_NUMBER from data_event_oldods WHERE ETL_DATE BETWEEN   START_DATE  AND   END_DATE  ;
DBMS_OUTPUT.put_line('总数:'||TOTAL_NUMBER );

select ceil(TOTAL_NUMBER/EXTRACT_number) into page from dual;
DBMS_OUTPUT.put_line('每次抽取数/页数:'||EXTRACT_number||'-'||page );



--3.获取抽取字段名
SELECT listagg(to_char(column_name),',') within group(order by column_name) INTO FIELD_NAME
FROM 
(
SELECT COLUMN_NAME FROM  user_tab_columns where table_name =UPPER(TABLE_TARGET)
INTERSECT
SELECT COLUMN_NAME FROM  user_tab_columns where table_name =UPPER(TABLE_SOURCE)
);
DBMS_OUTPUT.put_line('抽取字段:'||FIELD_NAME);

--4.分页抽取(启用并行,少日志)

FOR I IN 1..page LOOP
SQL_T :=
'
 insert into /*+ append */ '||TABLE_TARGET||' nologging ( '||FIELD_NAME||')  
 SELECT /*+paralle(16)*/ '||FIELD_NAME||' 
 FROM
 (SELECT /*+paralle(16)*/ '||FIELD_NAME||' ,RW
 FROM
 (select /*+paralle(16)*/ '||FIELD_NAME||'  ,ROWNUM RW
 from  '||TABLE_SOURCE||'  
 WHERE ETL_DATE BETWEEN  '||START_DATE||' AND  '||END_DATE||' ORDER BY ROWID)
 WHERE RW <='||EXTRACT_number||' *'||i||')
 WHERE RW >'||EXTRACT_number||' * ('||i||'-1)
';

DBMS_OUTPUT.put_line(SQL_T);
execute immediate SQL_T;
commit;
DBMS_OUTPUT.put_line(i||'页抽取成功');

end loop;

END_TIME :=SYSDATE;

--5.返回相关参数(插入多少行,用时多久)

DBMS_OUTPUT.put_line(TOTAL_NUMBER||'行已插入');
DBMS_OUTPUT.put_line('用时'||to_char(END_TIME-BEGIN_TIME));

--6.异常处理(错误数据类型)

EXCEPTION WHEN OTHERS THEN
  
  ROLLBACK;
  
DBMS_OUTPUT.put_line('出现异常');
DBMS_OUTPUT.put_line(DBMS_UTILITY.FORMAT_ERROR_STACK);

  
end P_DATA_EXTRACT_0915;


2.数据校验(待补充)


3.数据抽取并行度测试

--创建随机表
create table test_a as
select sys_guid() TID,dbms_random.random TNUM,dbms_random.string('x',30) Tlet,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual;

truncate table test_a;
drop table test_a;
select count(1) from test_a order by tid;

--插入99999,13s
insert into test_a
select  sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual
connect by level  <100000;
commit;

--开启并行模式
alter session force parallel dml;
alter session force parallel query;
alter session enable parallel DML;
alter session enable parallel query;


--插入百万条数据,耗时123秒
insert  into test_a
select sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual a
connect by level  <1000000;
commit;

--插入百万条数据,耗时124秒
insert /*+ parallel(bb,8)*/ into test_a bb
select /*+ parallel(aa,8)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual aa
connect by level  <1000000;
commit;

--在两百万基础上插入百万条数据,耗时198秒
insert /*+ parallel(bb,8)*/ into test_a bb
select /*+ parallel(aa,8)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual aa
connect by level  <1000000;
commit;

--在两百万基础上插入百万条数据,,并执行alter session force parallel dml;alter session force parallel query;耗时189秒
insert /*+ parallel(bb,8)*/ into test_a bb
select /*+ parallel(aa,8)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual aa
connect by level  <1000000;
commit;

--在两百万基础上插入百万条数据,耗时198秒
insert /*+ parallel(bb,8) */ into test_a bb
select /*+ parallel(aa,8) */ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual aa
connect by level  <1000000;
commit;

--在两百万基础上插入百万条数据,耗时198秒
insert /*+append parallel(6)*/ into test_a 
select /*+parallel(6)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual A
connect by level  <1000000;
commit;

--在两百万基础上插入百万条数据,耗时173秒
insert into test_a 
select /*+ parallel(16)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual
connect by level  <1000000;
commit;

--在两百万基础上插入百万条数据,耗时184秒
insert into test_a 
select /*+ parallel(12)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual
connect by level  <1000000;
commit;

--在两百万基础上插入百万条数据,耗时180秒
insert into test_a 
select /*+ parallel(anto)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual
connect by level  <1000000;
commit;

/*并行度正确开启
第一:开启并行度,下面任意一种都有效
alter session force parallel dml;  开启这个insert/update/delete有效
alter session force parallel query;  开启这个select有效

alter session enable parallel DML;
alter session enable parallel query;
第二:正确的语法
update/delete/insert/create/select 后面/*+ parallel(16) */,没有别名,没有anto,可以空格
insert /*+ append paralled(16)*/ 也有效果 */
如果要加别名的话,就需要给多张表同时加,否则会没有效果,/*+ append paralled(a 16)*/ 或者/*+ append paralled(a,16)*/ 都行

--最高效的语句
insert /*+paralle(a,16)*/ into test_a a
select /*+parallel(b,16)*/ sys_guid(),dbms_random.random TNUM,dbms_random.string('x',30) Tletter,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_1 ,to_date(2454084+TRUNC(DBMS_RANDOM.VALUE(0,365)),'J') tdate_2 from dual b
connect by level  <2000000;
commit;

posted @ 2022-03-06 19:13  付十一。  阅读(161)  评论(0)    收藏  举报