[10]sql优化-ads_fineReport.operation_analysis_data
原始SQL 语句
-- 本节点类型适用于MySQL数据源
SET query_timeout = 1000;
insert overwrite ads_fineReport.operation_analysis_data
select distinct
t1.userId,
t1.loginDate,
if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate) as registeredDate,
if(seconds_diff(t1.loginDate,if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate)) > 600,'1','0') as user_type,
t10.client_type_name,
t3.clueId as b_clueId,
t4.clueId as c_clueId,
t3.clueActivityid,
t4.pageName,
t3.originIdend,
t4_1.origin_name_one as end_originName1,
t4_1.origin_name_two as end_originName2,
t3.originId,
t4_2.origin_name_one as originName1,
t4_2.origin_name_two as originName2,
t11.origindictName,
t9.origindictName,
t3.clueEseeidend,
concat(t5_1.userName,'(',t5_1.eeNo,')') as end_userName,
t5_1.eeNo as end_eeNo,
t5_1.dptId as end_dptId,
t5_1.dptPath as end_dptName,
t3.clueEseeid,
concat(t5_2.userName,'(',t5_2.eeNo,')') as userName,
t5_2.eeNo,
t5_2.dptId,
t5_2.dptPath as dptName,
t3.projectIdend,
t6_1.project_name_two as end_projectName2,
t6_1.project_id_one as end_projectId1,
t6_1.project_name_one as end_projectName1,
t3.projectId,
t6_2.project_name_two as projectName2,
t6_2.project_id_one as projectId1,
t6_2.project_name_one as projectName1,
t3.clueEndname,
if(t4.clueStatus not in ('N','A','O'),'UN',t4.clueStatus) as clueStatus,
t7.act_type,
t7.activityactTypeid,
t7.activityName,
t7.comStatus,
t7.activitytime,
t8.orderId,
t8.coType,
t8.orderSource,
t8.receiptId,
t8.receiptAmount *1.0/100 as receiptAmount,
t8.receiptTradeDate,
t8.achieve_type,
cast(now() as string) as data_created_time
from (
select userId ,loginDate, oid, clientType
from datawarehouse.dwd_lc_LogULogin where isPhysicsDel <> 1 and userId <> '' and loginStatus = 'Y' and oid != '' and userMobile <> '' and loginDate >= DATE_SUB(NOW(),INTERVAL 3 month)
) t1 -- 登录 4,27 4,704
left join (
select distinct dictCode as client_type, dictName as client_type_name
from datawarehouse.dwd_uc_UcDict where dictType = 'ctype'
) t10 on t1.clientType = t10.client_type -- 登录客户端类型
left join (
select registeredDate,userId
from datawarehouse.dwd_lc_LogURegistered where isPhysicsDel <> 1 and userId <> '' and isAbnormal <> 1
) t2 on t1.userId = t2.userId -- 注册 7,42 1,829
left join (
select clueId, clueActivityid, originIdend, clueEseeidend, clueEseeid, projectIdend, projectId, clueEndname, clueLoginid ,originId ,clueWayend ,clueWay
from datawarehouse.dwd_es_behavior_new where clueLoginid <> '' and clueCreateddate >= '2023-01-01'
) t3 on t1.oid = t3.clueLoginid -- 行为 13,37 7,216
left join (
select a.clueId,a.clueStatus,b.pageName from (
select clueId,clueStatus,pageId
from datawarehouse.dwd_es_clueeslog where clueId <> '' and clueCreateddate >= '2023-01-01'
) a
LEFT JOIN (
select pageName,pageId
from datawarehouse.dwd_es_EsPage where pageId <> ''
) b ON a.pageId = b.pageId
) t4 on t3.clueId = t4.clueId -- 线索 13,744,530
left join dws.dws_es_ClueOrigin t4_1 on t3.originIdend = t4_1.origin_id_two -- 当前渠道
left join dws.dws_es_ClueOrigin t4_2 on t3.originId = t4_2.origin_id_two -- 初始渠道
left join (
select a.eeId,a.userName,a.eeNo,b.dptId,c.dptPath
from (
select eeId ,userName ,eeNo
from datawarehouse.dwd_bd_BdEe where eeId <> '' and eeAvlstatus = 'Y' and eeDelstatus = 'N' and isPhysicsDel <> 1
) a
left join (
select dptId ,eeId
from datawarehouse.dwd_bd_BdDptee where dptId <> '' and eeId <> '' and isPhysicsDel <> 1 and dpteeRelation = 'P'
) b on a.eeId = b.eeId
left join (
select dptPath, dptId
from datawarehouse.dwd_bd_BdDpt where dptAvlstatus = 'Y' and dptDelstatus = 'N' and dptId <> '' and isPhysicsDel <> 1
) c on b.dptId = c.dptId
) t5_1 on t3.clueEseeidend = t5_1.eeId -- 当前推广人信息
left join (
select a.eeId,a.userName,a.eeNo,b.dptId,c.dptPath
from (
select eeId ,userName ,eeNo
from datawarehouse.dwd_bd_BdEe where eeId <> '' and eeAvlstatus = 'Y' and eeDelstatus = 'N' and isPhysicsDel <> 1
) a
left join (
select dptId ,eeId
from datawarehouse.dwd_bd_BdDptee where dptId <> '' and eeId <> '' and isPhysicsDel <> 1 and dpteeRelation = 'P'
) b on a.eeId = b.eeId
left join (
select dptPath, dptId
from datawarehouse.dwd_bd_BdDpt where dptAvlstatus = 'Y' and dptDelstatus = 'N' and dptId <> '' and isPhysicsDel <> 1
) c on b.dptId = c.dptId
) t5_2 on t3.clueEseeid = t5_2.eeId -- 初始推广人信息
left join dws.dws_bd_ProjectInfo t6_1 on t3.projectIdend = t6_1.project_id_two -- 当前项目
left join dws.dws_bd_ProjectInfo t6_2 on t3.projectId = t6_2.project_id_two -- 初始项目
left join (
select origindictName,origindictId
from datawarehouse.dwd_es_BdOrigindict where isPhysicsDel <> 1 and origindictId <> '' and origindictAvlstatus = 'Y'
and origindictDelstatus = 'N' and origindictType = 'C'
) t11 on t3.clueWayend = t11.origindictId -- 当前获客方式
left join (
select origindictName,origindictId
from datawarehouse.dwd_es_BdOrigindict where isPhysicsDel <> 1 and origindictId <> '' and origindictAvlstatus = 'Y'
and origindictDelstatus = 'N' and origindictType = 'C'
) t9 on t3.clueWay = t9.origindictId -- 初始获客方式
left join (
select act_type , activityactTypeid ,activityName ,comStatus ,activitytime ,userId
from ads_fineReport.operation_analysis_activity_info oaai where activitytime >= '2023-01-01' and userId <> ''
) t7 on t1.userId = t7.userId -- 用户参与活动信息:活动类型、活动id、活动名称、参加活动时间、参与/完成 状态 完成:1 -- 1,134,252
left join (
select distinct o.userId, o.orderId, o.orderSource, oc.coType, r.receiptId, r.receiptAmount, r.receiptTradeDate,
if(ct.classtypeUsesourcetype in ('E','G'),'系统课','引流课') as achieve_type
from (
select userId, orderId, orderSource
from datawarehouse.dwd_oc_OcOrder where orderAvlstatus = 'Y' and orderDelstatus = 'N' and orderId <> '' and userId <> ''
) o
inner join (
select receiptId, receiptAmount, receiptTradeDate, orderId
from datawarehouse.dwd_oc_OcReceipt where receiptDelstatus = 'N' and receiptAvlstatus = 'Y' and orderId <> ''
) r on o.orderId = r.orderId
left join (
select distinct orderId, skuId
from datawarehouse.dwd_oc_OcOrderitem where oiAvlstatus = 'Y' and oiDelstatus = 'N' and orderId <> ''
) oi on o.orderId = oi.orderId
left join (
select classId , classtypeId
from datawarehouse.dwd_bd_BdClass where classAvlstatus = 'Y' and classDelstatus = 'N' and classId <> '' and classtypeId <> ''
) c on oi.skuId = c.classId
left join (
select distinct classtypeId,classtypeUsesourcetype from datawarehouse.dwd_bd_BdClasstype where classtypeId <> ''
) ct on c.classtypeId = ct.classtypeId
left join (
select coType, orderId from datawarehouse.dwd_oc_OcClassorder where orderId <> ''
) oc on o.orderId = oc.orderId
) t8 on t1.userId = t8.userId -- 用户成单信息:用户id、订单id、订单来源、订单类型、收据id、收据金额、收据时间、订单大类 -- 8,322,819
最新 优化后SQL 语句
-- 【运营数据分析—前置用户登录表】
CREATE TABLE `operation_analysis_data_login_user` (
`userId` varchar(100) NULL COMMENT "用户ID",
`loginDate` varchar(300) NULL COMMENT "登录时间(yyyy-MM-dd HH:mm:ss)",
`oid` varchar(100) NULL COMMENT "行为登录编号(关联行为表)",
`clientType` varchar(300) NULL COMMENT "登录客户端类型",
`user_type` varchar(100) NULL COMMENT "用户类型",
`registeredDate` varchar(300) NULL COMMENT "注册时间(yyyy-MM-dd HH:mm:ss)",
`data_created_time` varchar(600) NULL COMMENT "数据更新时间(yyyy-MM-dd HH:mm:ss)"
) ENGINE=OLAP
DUPLICATE KEY(`userId`)
COMMENT "运营数据分析—前置用户登录表"
DISTRIBUTED BY HASH(`userId`) BUCKETS 8
PROPERTIES (
"replication_num" = "3",
"in_memory" = "false",
"storage_format" = "DEFAULT",
"enable_persistent_index" = "false",
"compression" = "LZ4"
);
-- 调度
insert overwrite ads_fineReport.operation_analysis_data_login_user
select t1.userId , t1.loginDate , t1.oid , t1_1.dictName as client_type_name,
if(seconds_diff(t1.loginDate,if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate)) > 600,'1','0') as user_type,
if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate) as registeredDate, cast(now() as string) as data_created_time
-- 登录表
from datawarehouse.dwd_lc_LogULogin t1
-- 登录客户端类型
left join datawarehouse.dwd_uc_UcDict t1_1 on t1.clientType = t1_1.dictCode and dictType = 'ctype'
-- 注册表
left join datawarehouse.dwd_lc_LogURegistered t2 on t1.userId = t2.userId and t2.isPhysicsDel = 2 and t2.isAbnormal <> 1
where t1.isPhysicsDel = 2 and t1.userId <> '' and t1.loginStatus = 'Y' and t1.oid != ''
and t1.userMobile <> ''
and t1.loginDate >= DATE_SUB(NOW(),INTERVAL 3 month)
-- ---------------------
-- 【运营数据分析—前置行为线索信息表】
CREATE TABLE `operation_analysis_data_clue` (
`b_clueId` varchar(200) NULL COMMENT "行为线索ID",
`clueLoginid` varchar(100) NULL COMMENT "行为登录编号(关联行为表)",
`b_clueCreateddate` datetime NULL COMMENT "行为线索创建时间",
`clueActivityid` varchar(300) NULL COMMENT "行为表活动id",
`clueEndname` varchar(2000) NULL COMMENT "当前页面名称",
`c_clueId` varchar(200) NULL COMMENT "线索ID",
`c_clueCreateddate` datetime NULL COMMENT "线索创建时间",
`pageName` varchar(3000) NULL COMMENT "推广页名称",
`clueStatus` varchar(100) NULL COMMENT "线索状态",
`originIdend` varchar(100) NULL COMMENT "当前渠道ID",
`end_originName1` varchar(200) NULL COMMENT "当前渠道类型",
`end_originName2` varchar(200) NULL COMMENT "当前渠道名称",
`originId` varchar(100) NULL COMMENT "初始渠道ID",
`originName1` varchar(200) NULL COMMENT "初始渠道类型",
`originName2` varchar(200) NULL COMMENT "初始渠道名称",
`clueEseeidend` varchar(100) NULL COMMENT "当前推广人ID",
`end_userName` varchar(100) NULL COMMENT "当前推广人名称",
`end_eeNo` varchar(100) NULL COMMENT "当前推广人员工编号",
`end_dptId` varchar(100) NULL COMMENT "当前推广人部门ID",
`end_dptName` varchar(1000) NULL COMMENT "当前推广人部门名称",
`clueEseeid` varchar(100) NULL COMMENT "初始推广人ID",
`userName` varchar(100) NULL COMMENT "初始推广人名称",
`eeNo` varchar(100) NULL COMMENT "初始推广人员工编号",
`dptId` varchar(100) NULL COMMENT "初始推广人部门ID",
`dptName` varchar(1000) NULL COMMENT "初始推广人部门名称",
`projectIdend` varchar(200) NULL COMMENT "当前二级项目ID",
`end_projectName2` varchar(200) NULL COMMENT "当前二级项目名称",
`end_projectId1` varchar(200) NULL COMMENT "当前一级项目ID",
`end_projectName1` varchar(200) NULL COMMENT "当前一级项目名称",
`projectId` varchar(200) NULL COMMENT "初始二级项目ID",
`projectName2` varchar(200) NULL COMMENT "初始二级项目名称",
`projectId1` varchar(200) NULL COMMENT "初始一级项目ID",
`projectName1` varchar(200) NULL COMMENT "初始一级项目名称",
`clueWay_ori` varchar(600) NULL COMMENT "初始获客方式",
`clueWay` varchar(600) NULL COMMENT "当前获客方式",
`data_created_time` varchar(600) NULL COMMENT "数据更新时间(yyyy-MM-dd HH:mm:ss)"
) ENGINE=OLAP
DUPLICATE KEY(`b_clueId`)
COMMENT "运营数据分析—前置行为线索信息表"
DISTRIBUTED BY HASH(`b_clueId`) BUCKETS 12
PROPERTIES (
"replication_num" = "3",
"in_memory" = "false",
"storage_format" = "DEFAULT",
"enable_persistent_index" = "false",
"compression" = "LZ4"
);
-- 调度
insert overwrite ads_fineReport.operation_analysis_data_clue
select t1.clueId as b_clueId, t1.clueLoginid ,t1.clueCreateddate ,t1.clueActivityid, t1.clueEndname,
t2.clueId as c_clueId ,t2.clueCreateddate ,t2_1.pageName , if(t2.clueStatus not in ('N','A','O'),'UN',t2.clueStatus) as clueStatus,
t1.originIdend, t1_1.origin_name_one as end_originName1, t1_1.origin_name_two as end_originName2,
t1.originId, t1_2.origin_name_one as originName1,t1_2.origin_name_two as originName2 ,
t1.clueEseeidend, concat(t1_3.user_name ,'(',t1_3.ee_no ,')') as end_userName, t1_3.ee_no as end_eeNo, t1_3.dpt_id_two as end_dptId, t1_3.dpt_path as end_dptName,
t1.clueEseeid, concat(t1_4.user_name ,'(',t1_4.ee_no ,')') as userName, t1_4.ee_no as eeNo, t1_4.dpt_id_two as dptId, t1_4.dpt_path as dptName,
t1.projectIdend, t1_5.project_name_two as end_projectName2, t1_5.project_id_one as end_projectId1, t1_5.project_name_one as end_projectName1,
t1.projectId, t1_6.project_name_two as projectName2, t1_6.project_id_one as projectId1, t1_6.project_name_one as projectName1,
t1_7.origindictName as clueWay_ori, t1_8.origindictName as clueWay,
cast(now() as string) as data_created_time
-- 行为表
from datawarehouse.dwd_es_behavior_new t1
-- 推广中心-线索表
left join datawarehouse.dwd_es_clueeslog t2 on t1.clueId = t2.clueId and t2.clueCreateddate >= '2023-01-01'
-- 推广页面
left join datawarehouse.dwd_es_EsPage t2_1 on t2.pageId = t2_1.pageId
-- 当前渠道
left join dws.dws_es_ClueOrigin t1_1 on t1.originIdend = t1_1.origin_id_two
-- 初始渠道
left join dws.dws_es_ClueOrigin t1_2 on t1.originId = t1_2.origin_id_two
-- 当前推广人信息
left join dws.dws_bd_EeDpt t1_3 on t1.clueEseeidend = t1_3.ee_id
-- 初始推广人信息
left join dws.dws_bd_EeDpt t1_4 on t1.clueEseeid = t1_4.ee_id
-- 当前项目
left join dws.dws_bd_ProjectInfo t1_5 on t1.projectIdend = t1_5.project_id_two
-- 初始项目
left join dws.dws_bd_ProjectInfo t1_6 on t1.projectId = t1_6.project_id_two
-- 当前获客方式
left join datawarehouse.dwd_es_BdOrigindict t1_7 on t1.clueWayend = t1_7.origindictId
and t1_7.origindictAvlstatus = 'Y' and t1_7.origindictDelstatus = 'N' and t1_7.origindictType = 'C'
-- 初始获客方式
left join datawarehouse.dwd_es_BdOrigindict t1_8 on t1.clueWay = t1_8.origindictId
and t1_8.origindictAvlstatus = 'Y' and t1_8.origindictDelstatus = 'N' and t1_8.origindictType = 'C'
where t1.clueCreateddate >= DATE_SUB(NOW(),INTERVAL 12 month) and t1.clueLoginid <> ''
-- ---------------------
-- 【运营数据分析—前置订单表】
CREATE TABLE `operation_analysis_data_order` (
`userId` varchar(100) NULL COMMENT "用户ID",
`orderId` varchar(600) NULL COMMENT "订单ID",
`orderCreateddate` varchar(300) NULL COMMENT "订单创建时间(yyyy-MM-dd HH:mm:ss)",
`orderSource` varchar(100) NULL COMMENT "订单来源",
`coType` varchar(100) NULL COMMENT "订单类型",
`receiptId` varchar(600) NULL COMMENT "收据ID",
`receiptAmount` decimal(18, 4) NULL COMMENT "收据金额",
`receiptTradeDate` varchar(600) NULL COMMENT "收据日期(yyyy-MM-dd HH:mm:ss)",
`achieve_type` varchar(100) NULL COMMENT "正价课类型",
`data_created_time` varchar(600) NULL COMMENT "数据更新时间(yyyy-MM-dd HH:mm:ss)"
) ENGINE=OLAP
DUPLICATE KEY(`userId`)
COMMENT "运营数据分析—前置订单表"
DISTRIBUTED BY HASH(`userId`) BUCKETS 8
PROPERTIES (
"replication_num" = "3",
"in_memory" = "false",
"storage_format" = "DEFAULT",
"enable_persistent_index" = "false",
"compression" = "LZ4"
);
-- 调度
insert overwrite ads_fineReport.operation_analysis_data_order
select t2.userId ,t2.orderId ,t2.orderCreateddate ,t2.orderSource ,t3.coType ,t1.receiptId , t1.receiptAmount *1.0/100 , t1.receiptTradeDate,
if(t6.classtypeUsesourcetype in ('E','G'),'系统课','引流课') as achieve_type, cast(now() as string) as data_created_time
-- 收据表
from datawarehouse.dwd_oc_OcReceipt t1
-- 订单表
join datawarehouse.dwd_oc_OcOrder t2 on t1.orderId = t2.orderId
-- 班级订单表 【订单类型】
left join datawarehouse.dwd_oc_OcClassorder t3 on t2.orderId = t3.orderId
-- 订单项表 【sku -- 班级id】
left join datawarehouse.dwd_oc_OcOrderitem t4 on t2.orderId = t4.orderId and t4.oiAvlstatus = 'Y' and t4.oiDelstatus = 'N'
-- 班级表
left join datawarehouse.dwd_bd_BdClass t5 on t4.skuId = t5.classId and t5.classAvlstatus = 'Y' and t5.classDelstatus = 'N'
-- 班型表
left join datawarehouse.dwd_bd_BdClasstype t6 on t5.classtypeId = t6.classtypeId
where t1.receiptDelstatus = 'N' and t1.receiptAvlstatus = 'Y'
and t2.orderDelstatus = 'N' and t2.orderAvlstatus = 'Y' and t2.userId <> ''
and t1.receiptTradeDate >= '2023-01-01'
group by t2.userId ,t2.orderId ,t2.orderCreateddate, t2.orderSource ,t3.coType ,t1.receiptId , t1.receiptAmount , t1.receiptTradeDate,
if(t6.classtypeUsesourcetype in ('E','G'),'系统课','引流课')
-- --------------
-- 替换对应前置表
-- 本节点类型适用于MySQL数据源
SET query_timeout = 1000;
insert overwrite ads_fineReport.operation_analysis_data
select
t1.userId,
t1.loginDate,
t1.registeredDate,
t1.user_type,
t1.clientType,
t2.b_clueId ,
t2.c_clueId ,
t2.clueActivityid ,
t2.pageName ,
t2.originIdend ,
t2.end_originName1 ,
t2.end_originName2 ,
t2.originId ,
t2.originName1 ,
t2.originName2 ,
t2.clueWay_ori ,
t2.clueWay ,
t2.clueEseeidend ,
t2.end_userName ,
t2.end_eeNo ,
t2.end_dptId ,
t2.end_dptName ,
t2.clueEseeid ,
t2.userName ,
t2.eeNo ,
t2.dptId ,
t2.dptName ,
t2.projectIdend ,
t2.end_projectName2 ,
t2.end_projectId1 ,
t2.end_projectName1 ,
t2.projectId ,
t2.projectName2 ,
t2.projectId1 ,
t2.projectName1 ,
t2.clueEndname ,
t2.clueStatus ,
t3.act_type,
t3.activityactTypeid,
t3.activityName,
t3.comStatus,
t3.activitytime,
t4.orderId,
t4.coType,
t4.orderSource,
t4.receiptId,
t4.receiptAmount ,
t4.receiptTradeDate,
t4.achieve_type,
cast(now() as string) as data_created_time
-- 【运营数据分析—前置用户登录表】
from ads_fineReport.operation_analysis_data_login_user t1
-- 【运营数据分析—前置行为线索信息表】
left join ads_fineReport.operation_analysis_data_clue t2 on t1.oid = t2.clueLoginid
-- 【用户参与活动信息表】 活动类型、活动id、活动名称、参加活动时间、参与/完成 状态 完成:1
left join (
select act_type , activityactTypeid ,activityName ,comStatus ,activitytime ,userId
from ads_fineReport.operation_analysis_activity_info oaai where activitytime >= '2023-01-01' and userId <> ''
) t3 on t1.userId = t3.userId
-- 【运营数据分析—前置订单表】
left join ads_fineReport.operation_analysis_data_order t4 on t1.userId = t4.userId
group by
t1.userId, t1.loginDate, t1.registeredDate, t1.user_type, t1.clientType,
t2.b_clueId , t2.c_clueId , t2.clueActivityid , t2.pageName , t2.originIdend , t2.end_originName1 ,
t2.end_originName2 , t2.originId , t2.originName1 , t2.originName2 , t2.clueWay_ori , t2.clueWay ,
t2.clueEseeidend , t2.end_userName , t2.end_eeNo , t2.end_dptId , t2.end_dptName , t2.clueEseeid ,
t2.userName , t2.eeNo , t2.dptId , t2.dptName , t2.projectIdend , t2.end_projectName2 , t2.end_projectId1 ,
t2.end_projectName1 , t2.projectId , t2.projectName2 , t2.projectId1 , t2.projectName1 , t2.clueEndname , t2.clueStatus ,
t3.act_type, t3.activityactTypeid, t3.activityName, t3.comStatus, t3.activitytime,
t4.orderId, t4.coType, t4.orderSource, t4.receiptId, t4.receiptAmount , t4.receiptTradeDate, t4.achieve_type