oceanbase 数据库SQL优化 (把你的脑袋当成CBO)

OB一哥们找我优化条SQL,反馈在OceanBase存储过程执行时间很慢,需要626秒才能出结果,安排。
INSERT INTO insurance_stat_sx (id, stat_date, cal_num, underwrite_num, veh_num, effect_num, effect_money, unit_code, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type) SELECT t_seq_common.nextval AS id, '2023-05-15', cal_num, underwrite_num, veh_num, effect_num, effect_money, unit_code, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type FROM (SELECT SUM(cal_num) AS cal_num, SUM(underwrite_num) AS underwrite_num, SUM(veh_num) AS veh_num, SUM(effect_num) AS effect_num, SUM(effect_money) AS effect_money, unit_code, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type FROM (SELECT log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, SUM(CASE WHEN log.oper_type = 2 THEN 1 ELSE 0 END) cal_num, SUM(CASE WHEN log.oper_type = 3 THEN 1 ELSE 0 END) underwrite_num, COUNT(DISTINCT(registration_number)) veh_num, 0 effect_num, 0 effect_money, log.client_type, log.app_type FROM AAAAA log WHERE log.life_agent_id IS NOT NULL AND log.create_time >= TO_DATE('2023-05-15', 'yyyy-mm-dd') AND log.create_time < TO_DATE('2023-05-16', 'yyyy-mm-dd') AND log.app_type IS NOT NULL AND log.client_type IS NOT NULL GROUP BY log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, log.client_type, log.app_type UNION ALL SELECT log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, 0 cal_num, 0 underwrite_num, 0 veh_num, COUNT(p.BBBB_pk) effect_num, SUM(NVL(po.underwritten_premium, 0)) effect_money, log.client_type, log.app_type FROM BBBB p, CCCC v, DDDD pr, EEEE po, (SELECT unit_code, policy_id, TO_CHAR(create_time, 'yyyy-mm-dd') create_time, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type FROM AAAAA WHERE policy_status = '3' AND oper_type = 7 AND life_agent_id IS NOT NULL AND app_type IS NOT NULL AND client_type IS NOT NULL GROUP BY unit_code, policy_id, TO_CHAR(create_time, 'yyyy-mm-dd'), life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type) log WHERE p.BBBB_pk = v.BBBB_fk AND v.CCCC_pk = pr.CCCC_fk AND pr.DDDD_pk = po.DDDD_fk AND p.policy_status = '3' AND log.policy_id = p.BBBB_pk AND log.create_time >= '2023-05-15' AND log.create_time < '2023-05-16' GROUP BY log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, log.client_type, log.app_type) GROUP BY unit_code, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type) tmp;
SQL整体返回0行数据,所以insert into 也是0行,insurance_log 表 12亿行数据。
当时OB哥们给到我的时候我再忙其他事情,没有要执行计划,粗略扫了一下SQL大致的写法。
得知了 AAAAA 表 12 亿行数据以后,让他加个并行hint 试试看速度。
下面SQL除了加了并行 HINT ,后面无任何修改。
select /*+ USE_PX PARALLEL(8)*/ t_seq_common.nextval as id, --to_char('2023-05-15', 'yyyy-mm-dd') as stat_date, '2023-05-15', cal_num, underwrite_num, veh_num, effect_num, effect_money, unit_code, life_agent_id, life_agent_name... 省略后面SQL

并行 hint 加完以后只需要 281s 就能出结果,当时我也忙其他事情,没继续优化下去。
但是这哥们领导不依不饶,还得继续让他优化,没办法只能帮忙仔细看看了。

缓慢节点:
SELECT log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, SUM(CASE WHEN log.oper_type = 2 THEN 1 ELSE 0 END) cal_num, SUM(CASE WHEN log.oper_type = 3 THEN 1 ELSE 0 END) underwrite_num, COUNT(DISTINCT(registration_number)) veh_num, 0 effect_num, 0 effect_money, log.client_type, log.app_type FROM AAAAA log WHERE log.life_agent_id IS NOT NULL AND log.create_time >= TO_DATE('2023-05-15', 'yyyy-mm-dd') AND log.create_time < TO_DATE('2023-05-16', 'yyyy-mm-dd') AND log.app_type IS NOT NULL AND log.client_type IS NOT NULL GROUP BY log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, log.client_type, log.app_type UNION ALL SELECT log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, 0 cal_num, 0 underwrite_num, 0 veh_num, COUNT(p.BBBB_pk) effect_num, SUM(NVL(po.underwritten_premium, 0)) effect_money, log.client_type, log.app_type FROM BBBB p, CCCC v, DDDD pr, EEEE po, (SELECT unit_code, policy_id, TO_CHAR(create_time, 'yyyy-mm-dd') create_time, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type FROM AAAAA WHERE policy_status = '3' AND oper_type = 7 AND life_agent_id IS NOT NULL AND app_type IS NOT NULL AND client_type IS NOT NULL GROUP BY unit_code, policy_id, TO_CHAR(create_time, 'yyyy-mm-dd'), life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type) log WHERE p.BBBB_pk = v.BBBB_fk AND v.CCCC_pk = pr.CCCC_fk AND pr.DDDD_pk = po.DDDD_fk AND p.policy_status = '3' AND log.policy_id = p.BBBB_pk AND log.create_time >= '2023-05-15' AND log.create_time < '2023-05-16' GROUP BY log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, log.client_type, log.app_type;
union all 上面这段SQL AAAAA 表12亿数据,返回0行。
union all 下面这段SQL BBBB p 1.9亿、CCCC v 1.9亿 、DDDD pr 2.7亿、EEEE po 4430万、log 内联视图 2025W,关联后返回0行。
这么大的数据量关联,慢也是正常,但是知道数据量以后就好办了。
SQL改写 + hint 干预方案 :
SELECT t_seq_common.nextval AS id, --to_char('2023-05-15', 'yyyy-mm-dd') as stat_date, '2023-05-15', cal_num, underwrite_num, veh_num, effect_num, effect_money, unit_code, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type FROM (SELECT SUM(cal_num) AS cal_num, SUM(underwrite_num) AS underwrite_num, SUM(veh_num) AS veh_num, SUM(effect_num) AS effect_num, SUM(effect_money) AS effect_money, unit_code, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type FROM (WITH x_log AS ( SELECT /*+ USE_PX PARALLEL(6)*/ * FROM AAAAA log WHERE log.life_agent_id IS NOT NULL AND log.app_type IS NOT NULL AND log.client_type IS NOT NULL AND log.create_time >= to_date('2023-05-15', 'yyyy-mm-dd') AND log.create_time < to_date('2023-05-16', 'yyyy-mm-dd') ) SELECT /*+ USE_PX PARALLEL(4)*/ * log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, SUM(CASE WHEN log.oper_type = 2 THEN 1 ELSE 0 END) cal_num, SUM(CASE WHEN log.oper_type = 3 THEN 1 ELSE 0 END) underwrite_num, COUNT(DISTINCT(registration_number)) veh_num, 0 effect_num, 0 effect_money, log.client_type, log.app_type FROM x_log log GROUP BY log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, log.client_type, log.app_type UNION ALL SELECT /*+ MERGE(log) USE_PX PARALLEL(5) PQ_DISTRIBUTE(p hash, hash) PQ_DISTRIBUTE(v hash, hash) PQ_DISTRIBUTE(pr hash, hash) PQ_DISTRIBUTE(po hash, hash) PQ_DISTRIBUTE(log hash, hash) */ log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, 0 cal_num, 0 underwrite_num, 0 veh_num, COUNT(p.BBBB_pk) effect_num, SUM(nvl(po.underwritten_premium, 0)) effect_money, log.client_type, log.app_type FROM BBBB p, CCCC v, DDDD pr, EEEE po, (SELECT /*+ USE_PX PARALLEL(5)*/ unit_code, policy_id, to_char(create_time, 'yyyy-mm-dd') create_time, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type FROM x_log log WHERE policy_status = '3' AND oper_type = 7 GROUP BY unit_code, policy_id, to_char(create_time, 'yyyy-mm-dd'), life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type) log WHERE p.BBBB_pk = v.BBBB_fk AND v.CCCC_pk = pr.CCCC_fk AND pr.DDDD_pk = po.DDDD_fk AND p.policy_status = '3' AND log.policy_id = p.BBBB_pk GROUP BY log.unit_code, log.life_agent_id, log.life_agent_name, log.sx_unit_code, log.sx_unit_name, log.sx_dept_group_code, log.sx_dept_group_name, log.sx_branch_code, log.sx_branch_name, log.unit_name, log.dept_group_code, log.dept_group_name, log.dept_code, log.dept_name, log.section_code, log.section_name, log.client_type, log.app_type) GROUP BY unit_code, life_agent_id, life_agent_name, sx_unit_code, sx_unit_name, sx_dept_group_code, sx_dept_group_name, sx_branch_code, sx_branch_name, unit_name, dept_group_code, dept_group_name, dept_code, dept_name, section_code, section_name, client_type, app_type ) tmp;
最终上面SQL 27s 就能跑出结果。

这个案例从始至终没有看过执行计划 (OB的执行计划我也看不懂,看了也是白看)。
当具备一定优化理论知识之后,我们可以不看执行计划,直接根据 SQL 写法和表的数据量来判断是否走 NL 还是 HASH,
然后一直这样进行下去直到 SQL 语句中所有表都关联完毕,如果大家长期采用此方法进行锻炼,久而久之,你自己的脑袋就是 CBO。

浙公网安备 33010602011771号