StarRocks-性能优化【数据治理06】

[10]sql优化-ads_fineReport.operation_analysis_data

原始SQL 语句
-- 本节点类型适用于MySQL数据源
SET query_timeout = 1000;
insert overwrite ads_fineReport.operation_analysis_data                                                                                          
select distinct
t1.userId,
t1.loginDate,
if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate) as registeredDate,
if(seconds_diff(t1.loginDate,if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate)) > 600,'1','0') as user_type,
t10.client_type_name,
t3.clueId as b_clueId,
t4.clueId as c_clueId,
t3.clueActivityid,
t4.pageName,
t3.originIdend,
t4_1.origin_name_one as end_originName1,
t4_1.origin_name_two as end_originName2,
t3.originId,
t4_2.origin_name_one as originName1,
t4_2.origin_name_two as originName2,
t11.origindictName,
t9.origindictName,
t3.clueEseeidend,
concat(t5_1.userName,'(',t5_1.eeNo,')') as end_userName,
t5_1.eeNo as end_eeNo,
t5_1.dptId as end_dptId,
t5_1.dptPath as end_dptName,
t3.clueEseeid,
concat(t5_2.userName,'(',t5_2.eeNo,')') as userName,
t5_2.eeNo,
t5_2.dptId,
t5_2.dptPath as dptName,
t3.projectIdend,
t6_1.project_name_two as end_projectName2,
t6_1.project_id_one as end_projectId1,
t6_1.project_name_one as end_projectName1,
t3.projectId,
t6_2.project_name_two as projectName2,
t6_2.project_id_one as projectId1,
t6_2.project_name_one as projectName1,
t3.clueEndname,
if(t4.clueStatus not in ('N','A','O'),'UN',t4.clueStatus) as clueStatus,
t7.act_type,
t7.activityactTypeid,
t7.activityName,
t7.comStatus,
t7.activitytime,
t8.orderId,
t8.coType,
t8.orderSource,
t8.receiptId,
t8.receiptAmount *1.0/100 as receiptAmount,
t8.receiptTradeDate,
t8.achieve_type,
cast(now() as string) as data_created_time
from (
    select userId ,loginDate, oid, clientType
    from datawarehouse.dwd_lc_LogULogin where isPhysicsDel <> 1 and userId <> '' and loginStatus = 'Y' and oid != '' and userMobile <> '' and loginDate >= DATE_SUB(NOW(),INTERVAL 3 month)
) t1 -- 登录  4,27 4,704
left join (
    select distinct dictCode as client_type, dictName as client_type_name
    from datawarehouse.dwd_uc_UcDict where dictType = 'ctype'
) t10 on t1.clientType = t10.client_type  -- 登录客户端类型

left join (
    select registeredDate,userId
    from datawarehouse.dwd_lc_LogURegistered where isPhysicsDel <> 1 and userId <> '' and isAbnormal <> 1
) t2 on t1.userId = t2.userId  -- 注册  7,42 1,829

left join (
    select clueId, clueActivityid, originIdend, clueEseeidend, clueEseeid, projectIdend, projectId, clueEndname, clueLoginid ,originId ,clueWayend ,clueWay
    from datawarehouse.dwd_es_behavior_new where clueLoginid <> '' and clueCreateddate >= '2023-01-01'
) t3 on t1.oid = t3.clueLoginid -- 行为  13,37 7,216

left join (
    select a.clueId,a.clueStatus,b.pageName from (
        select clueId,clueStatus,pageId
        from datawarehouse.dwd_es_clueeslog where clueId <> '' and clueCreateddate >= '2023-01-01'
    ) a
    LEFT JOIN (
        select pageName,pageId
        from datawarehouse.dwd_es_EsPage where pageId <> ''
    ) b ON a.pageId = b.pageId
) t4 on t3.clueId = t4.clueId -- 线索 13,744,530

left join dws.dws_es_ClueOrigin t4_1 on t3.originIdend = t4_1.origin_id_two  -- 当前渠道
left join dws.dws_es_ClueOrigin t4_2 on t3.originId = t4_2.origin_id_two  -- 初始渠道
left join (
    select a.eeId,a.userName,a.eeNo,b.dptId,c.dptPath
    from (
        select eeId ,userName ,eeNo
        from datawarehouse.dwd_bd_BdEe where eeId <> '' and eeAvlstatus = 'Y' and eeDelstatus = 'N' and isPhysicsDel <> 1
    ) a
    left join (
        select dptId ,eeId
        from datawarehouse.dwd_bd_BdDptee where dptId <> '' and eeId <> '' and isPhysicsDel <> 1 and dpteeRelation = 'P'
    ) b on a.eeId = b.eeId
    left join (
        select dptPath, dptId
        from datawarehouse.dwd_bd_BdDpt where dptAvlstatus = 'Y' and dptDelstatus = 'N' and dptId <> '' and isPhysicsDel <> 1
    ) c    on b.dptId = c.dptId
) t5_1 on t3.clueEseeidend = t5_1.eeId  -- 当前推广人信息
left join (
    select a.eeId,a.userName,a.eeNo,b.dptId,c.dptPath
    from (
        select eeId ,userName ,eeNo
        from datawarehouse.dwd_bd_BdEe where eeId <> '' and eeAvlstatus = 'Y' and eeDelstatus = 'N' and isPhysicsDel <> 1
    ) a
    left join (
        select dptId ,eeId
        from datawarehouse.dwd_bd_BdDptee where dptId <> '' and eeId <> '' and isPhysicsDel <> 1 and dpteeRelation = 'P'
    ) b on a.eeId = b.eeId
    left join (
        select dptPath, dptId
        from datawarehouse.dwd_bd_BdDpt where dptAvlstatus = 'Y' and dptDelstatus = 'N' and dptId <> '' and isPhysicsDel <> 1
    ) c    on b.dptId = c.dptId
) t5_2 on t3.clueEseeid = t5_2.eeId  -- 初始推广人信息

left join dws.dws_bd_ProjectInfo t6_1 on t3.projectIdend = t6_1.project_id_two  -- 当前项目
left join dws.dws_bd_ProjectInfo t6_2 on t3.projectId = t6_2.project_id_two  -- 初始项目

left join (
    select origindictName,origindictId
    from datawarehouse.dwd_es_BdOrigindict where isPhysicsDel <> 1 and origindictId <> '' and origindictAvlstatus = 'Y'
    and origindictDelstatus = 'N' and origindictType = 'C'
) t11 on t3.clueWayend = t11.origindictId  -- 当前获客方式

left join (
    select origindictName,origindictId
    from datawarehouse.dwd_es_BdOrigindict where isPhysicsDel <> 1 and origindictId <> '' and origindictAvlstatus = 'Y'
    and origindictDelstatus = 'N' and origindictType = 'C'
) t9 on t3.clueWay = t9.origindictId  -- 初始获客方式

left join (
    select act_type , activityactTypeid ,activityName ,comStatus ,activitytime ,userId
    from ads_fineReport.operation_analysis_activity_info oaai where activitytime >= '2023-01-01' and userId <> ''
) t7 on t1.userId = t7.userId  -- 用户参与活动信息:活动类型、活动id、活动名称、参加活动时间、参与/完成 状态  完成:1   -- 1,134,252

left join (
    select distinct o.userId, o.orderId, o.orderSource, oc.coType, r.receiptId, r.receiptAmount, r.receiptTradeDate,
    if(ct.classtypeUsesourcetype in ('E','G'),'系统课','引流课') as achieve_type
    from (
        select  userId, orderId, orderSource
        from datawarehouse.dwd_oc_OcOrder where orderAvlstatus = 'Y' and orderDelstatus = 'N' and orderId <> '' and userId <> ''
    ) o
    inner join (
        select  receiptId, receiptAmount, receiptTradeDate, orderId
        from datawarehouse.dwd_oc_OcReceipt where receiptDelstatus = 'N' and receiptAvlstatus = 'Y' and orderId <> ''
    ) r on o.orderId = r.orderId
    left join (
        select distinct orderId, skuId
        from datawarehouse.dwd_oc_OcOrderitem where oiAvlstatus = 'Y' and oiDelstatus = 'N' and orderId <> ''
    ) oi on o.orderId = oi.orderId
    left join (
        select classId , classtypeId
        from datawarehouse.dwd_bd_BdClass where classAvlstatus = 'Y' and classDelstatus = 'N' and classId <> '' and classtypeId <> ''
    ) c on oi.skuId = c.classId
    left join (
        select distinct classtypeId,classtypeUsesourcetype from datawarehouse.dwd_bd_BdClasstype where classtypeId <> ''
    ) ct on c.classtypeId = ct.classtypeId
    left join (
        select coType, orderId from datawarehouse.dwd_oc_OcClassorder where orderId <> ''
    ) oc on o.orderId = oc.orderId
) t8 on t1.userId = t8.userId  --  用户成单信息:用户id、订单id、订单来源、订单类型、收据id、收据金额、收据时间、订单大类  -- 8,322,819
最新 优化后SQL 语句
-- 【运营数据分析—前置用户登录表】 
CREATE TABLE `operation_analysis_data_login_user` (
  `userId` varchar(100) NULL COMMENT "用户ID",
  `loginDate` varchar(300) NULL COMMENT "登录时间(yyyy-MM-dd HH:mm:ss)",
  `oid` varchar(100) NULL COMMENT "行为登录编号(关联行为表)",
  `clientType` varchar(300) NULL COMMENT "登录客户端类型",
  `user_type` varchar(100) NULL COMMENT "用户类型",
  `registeredDate` varchar(300) NULL COMMENT "注册时间(yyyy-MM-dd HH:mm:ss)",
  `data_created_time` varchar(600) NULL COMMENT "数据更新时间(yyyy-MM-dd HH:mm:ss)"
) ENGINE=OLAP 
DUPLICATE KEY(`userId`)
COMMENT "运营数据分析—前置用户登录表"
DISTRIBUTED BY HASH(`userId`) BUCKETS 8 
PROPERTIES (
"replication_num" = "3",
"in_memory" = "false",
"storage_format" = "DEFAULT",
"enable_persistent_index" = "false",
"compression" = "LZ4"
);

-- 调度
insert overwrite ads_fineReport.operation_analysis_data_login_user
select t1.userId , t1.loginDate , t1.oid , t1_1.dictName as client_type_name,
if(seconds_diff(t1.loginDate,if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate)) > 600,'1','0') as user_type,
if(t2.registeredDate is null,'2021-04-21 00:00:00',t2.registeredDate) as registeredDate, cast(now() as string) as data_created_time
-- 登录表
from  datawarehouse.dwd_lc_LogULogin t1
-- 登录客户端类型
left join datawarehouse.dwd_uc_UcDict t1_1 on t1.clientType = t1_1.dictCode and dictType = 'ctype'
-- 注册表
left join datawarehouse.dwd_lc_LogURegistered t2 on t1.userId = t2.userId and t2.isPhysicsDel = 2 and t2.isAbnormal <> 1
where t1.isPhysicsDel = 2 and t1.userId <> '' and t1.loginStatus = 'Y' and t1.oid != '' 
and t1.userMobile <> '' 
and t1.loginDate >= DATE_SUB(NOW(),INTERVAL 3 month)


-- ---------------------
-- 【运营数据分析—前置行为线索信息表】 
CREATE TABLE `operation_analysis_data_clue` (
  `b_clueId` varchar(200) NULL COMMENT "行为线索ID",
  `clueLoginid` varchar(100) NULL COMMENT "行为登录编号(关联行为表)",
  `b_clueCreateddate` datetime NULL COMMENT "行为线索创建时间",
  `clueActivityid` varchar(300) NULL COMMENT "行为表活动id",
  `clueEndname` varchar(2000) NULL COMMENT "当前页面名称",
  `c_clueId` varchar(200) NULL COMMENT "线索ID",
  `c_clueCreateddate` datetime NULL COMMENT "线索创建时间",
  `pageName` varchar(3000) NULL COMMENT "推广页名称",
  `clueStatus` varchar(100) NULL COMMENT "线索状态",
  `originIdend` varchar(100) NULL COMMENT "当前渠道ID",
  `end_originName1` varchar(200) NULL COMMENT "当前渠道类型",
  `end_originName2` varchar(200) NULL COMMENT "当前渠道名称",
  `originId` varchar(100) NULL COMMENT "初始渠道ID",
  `originName1` varchar(200) NULL COMMENT "初始渠道类型",
  `originName2` varchar(200) NULL COMMENT "初始渠道名称",
  `clueEseeidend` varchar(100) NULL COMMENT "当前推广人ID",
  `end_userName` varchar(100) NULL COMMENT "当前推广人名称",
  `end_eeNo` varchar(100) NULL COMMENT "当前推广人员工编号",
  `end_dptId` varchar(100) NULL COMMENT "当前推广人部门ID",
  `end_dptName` varchar(1000) NULL COMMENT "当前推广人部门名称",
  `clueEseeid` varchar(100) NULL COMMENT "初始推广人ID",
  `userName` varchar(100) NULL COMMENT "初始推广人名称",
  `eeNo` varchar(100) NULL COMMENT "初始推广人员工编号",
  `dptId` varchar(100) NULL COMMENT "初始推广人部门ID",
  `dptName` varchar(1000) NULL COMMENT "初始推广人部门名称",
  `projectIdend` varchar(200) NULL COMMENT "当前二级项目ID",
  `end_projectName2` varchar(200) NULL COMMENT "当前二级项目名称",
  `end_projectId1` varchar(200) NULL COMMENT "当前一级项目ID",
  `end_projectName1` varchar(200) NULL COMMENT "当前一级项目名称",
  `projectId` varchar(200) NULL COMMENT "初始二级项目ID",
  `projectName2` varchar(200) NULL COMMENT "初始二级项目名称",
  `projectId1` varchar(200) NULL COMMENT "初始一级项目ID",
  `projectName1` varchar(200) NULL COMMENT "初始一级项目名称",
  `clueWay_ori` varchar(600) NULL COMMENT "初始获客方式",
  `clueWay` varchar(600) NULL COMMENT "当前获客方式",
  `data_created_time` varchar(600) NULL COMMENT "数据更新时间(yyyy-MM-dd HH:mm:ss)"
) ENGINE=OLAP 
DUPLICATE KEY(`b_clueId`)
COMMENT "运营数据分析—前置行为线索信息表"
DISTRIBUTED BY HASH(`b_clueId`) BUCKETS 12 
PROPERTIES (
"replication_num" = "3",
"in_memory" = "false",
"storage_format" = "DEFAULT",
"enable_persistent_index" = "false",
"compression" = "LZ4"
);

-- 调度 
insert overwrite ads_fineReport.operation_analysis_data_clue
select t1.clueId as b_clueId, t1.clueLoginid ,t1.clueCreateddate ,t1.clueActivityid, t1.clueEndname,
t2.clueId as c_clueId ,t2.clueCreateddate ,t2_1.pageName , if(t2.clueStatus not in ('N','A','O'),'UN',t2.clueStatus) as clueStatus, 
t1.originIdend, t1_1.origin_name_one as end_originName1, t1_1.origin_name_two as end_originName2,
t1.originId, t1_2.origin_name_one as originName1,t1_2.origin_name_two as originName2 ,
t1.clueEseeidend, concat(t1_3.user_name ,'(',t1_3.ee_no ,')') as end_userName, t1_3.ee_no as end_eeNo, t1_3.dpt_id_two as end_dptId, t1_3.dpt_path as end_dptName,
t1.clueEseeid, concat(t1_4.user_name ,'(',t1_4.ee_no ,')') as userName, t1_4.ee_no as eeNo, t1_4.dpt_id_two as dptId, t1_4.dpt_path as dptName,
t1.projectIdend, t1_5.project_name_two as end_projectName2, t1_5.project_id_one as end_projectId1, t1_5.project_name_one as end_projectName1,
t1.projectId, t1_6.project_name_two as projectName2, t1_6.project_id_one as projectId1, t1_6.project_name_one as projectName1,
t1_7.origindictName as clueWay_ori, t1_8.origindictName as clueWay,
cast(now() as string) as data_created_time
-- 行为表
from datawarehouse.dwd_es_behavior_new t1
-- 推广中心-线索表
left join datawarehouse.dwd_es_clueeslog t2 on t1.clueId = t2.clueId and t2.clueCreateddate >= '2023-01-01'
-- 推广页面
left join datawarehouse.dwd_es_EsPage t2_1 on t2.pageId = t2_1.pageId
-- 当前渠道
left join dws.dws_es_ClueOrigin t1_1 on t1.originIdend = t1_1.origin_id_two 
-- 初始渠道
left join dws.dws_es_ClueOrigin t1_2 on t1.originId = t1_2.origin_id_two        
-- 当前推广人信息
left join dws.dws_bd_EeDpt t1_3 on t1.clueEseeidend = t1_3.ee_id                
-- 初始推广人信息
left join dws.dws_bd_EeDpt t1_4 on t1.clueEseeid = t1_4.ee_id           
-- 当前项目
left join dws.dws_bd_ProjectInfo t1_5 on t1.projectIdend = t1_5.project_id_two  
-- 初始项目
left join dws.dws_bd_ProjectInfo t1_6 on t1.projectId = t1_6.project_id_two    
-- 当前获客方式
left join datawarehouse.dwd_es_BdOrigindict t1_7 on t1.clueWayend = t1_7.origindictId 
	and t1_7.origindictAvlstatus = 'Y' and t1_7.origindictDelstatus = 'N' and t1_7.origindictType = 'C'
-- 初始获客方式
left join datawarehouse.dwd_es_BdOrigindict t1_8 on t1.clueWay = t1_8.origindictId 
	and t1_8.origindictAvlstatus = 'Y' and t1_8.origindictDelstatus = 'N' and t1_8.origindictType = 'C' 
where t1.clueCreateddate >= DATE_SUB(NOW(),INTERVAL 12 month) and t1.clueLoginid <> '' 


-- ---------------------
-- 【运营数据分析—前置订单表】 
CREATE TABLE `operation_analysis_data_order` (
  `userId` varchar(100) NULL COMMENT "用户ID",
  `orderId` varchar(600) NULL COMMENT "订单ID",
  `orderCreateddate` varchar(300) NULL COMMENT "订单创建时间(yyyy-MM-dd HH:mm:ss)",
  `orderSource` varchar(100) NULL COMMENT "订单来源",
  `coType` varchar(100) NULL COMMENT "订单类型",
  `receiptId` varchar(600) NULL COMMENT "收据ID",
  `receiptAmount` decimal(18, 4) NULL COMMENT "收据金额",
  `receiptTradeDate` varchar(600) NULL COMMENT "收据日期(yyyy-MM-dd HH:mm:ss)",
  `achieve_type` varchar(100) NULL COMMENT "正价课类型",
  `data_created_time` varchar(600) NULL COMMENT "数据更新时间(yyyy-MM-dd HH:mm:ss)"
) ENGINE=OLAP 
DUPLICATE KEY(`userId`)
COMMENT "运营数据分析—前置订单表"
DISTRIBUTED BY HASH(`userId`) BUCKETS 8 
PROPERTIES (
"replication_num" = "3",
"in_memory" = "false",
"storage_format" = "DEFAULT",
"enable_persistent_index" = "false",
"compression" = "LZ4"
);

-- 调度
insert overwrite ads_fineReport.operation_analysis_data_order
select t2.userId ,t2.orderId ,t2.orderCreateddate ,t2.orderSource ,t3.coType ,t1.receiptId , t1.receiptAmount *1.0/100 , t1.receiptTradeDate,
if(t6.classtypeUsesourcetype in ('E','G'),'系统课','引流课') as achieve_type, cast(now() as string) as data_created_time
-- 收据表
from datawarehouse.dwd_oc_OcReceipt t1 
-- 订单表
join datawarehouse.dwd_oc_OcOrder t2 on t1.orderId = t2.orderId 
-- 班级订单表  【订单类型】
left join datawarehouse.dwd_oc_OcClassorder t3 on t2.orderId = t3.orderId 
-- 订单项表 【sku -- 班级id】
left join datawarehouse.dwd_oc_OcOrderitem t4 on t2.orderId = t4.orderId and t4.oiAvlstatus = 'Y' and t4.oiDelstatus = 'N'
-- 班级表
left join datawarehouse.dwd_bd_BdClass t5 on t4.skuId = t5.classId and t5.classAvlstatus = 'Y' and t5.classDelstatus = 'N'
-- 班型表
left join datawarehouse.dwd_bd_BdClasstype t6 on t5.classtypeId = t6.classtypeId 
where t1.receiptDelstatus = 'N' and t1.receiptAvlstatus = 'Y' 
and t2.orderDelstatus = 'N' and t2.orderAvlstatus = 'Y' and t2.userId <> ''
and t1.receiptTradeDate >= '2023-01-01'
group by t2.userId ,t2.orderId ,t2.orderCreateddate, t2.orderSource ,t3.coType ,t1.receiptId , t1.receiptAmount , t1.receiptTradeDate,
if(t6.classtypeUsesourcetype in ('E','G'),'系统课','引流课')


-- --------------
-- 替换对应前置表
-- 本节点类型适用于MySQL数据源
SET query_timeout = 1000;
insert overwrite ads_fineReport.operation_analysis_data                                                                                          
select 
t1.userId, 
t1.loginDate, 
t1.registeredDate, 
t1.user_type, 
t1.clientType,
t2.b_clueId , 
t2.c_clueId , 
t2.clueActivityid , 
t2.pageName , 
t2.originIdend , 
t2.end_originName1 , 
t2.end_originName2 ,
t2.originId ,
t2.originName1 ,
t2.originName2 , 
t2.clueWay_ori , 
t2.clueWay ,
t2.clueEseeidend , 
t2.end_userName , 
t2.end_eeNo , 
t2.end_dptId , 
t2.end_dptName ,
t2.clueEseeid , 
t2.userName , 
t2.eeNo , 
t2.dptId , 
t2.dptName ,
t2.projectIdend ,
t2.end_projectName2 ,
t2.end_projectId1 ,
t2.end_projectName1 ,
t2.projectId ,
t2.projectName2 ,
t2.projectId1 ,
t2.projectName1 ,
t2.clueEndname ,
t2.clueStatus ,
t3.act_type,
t3.activityactTypeid,
t3.activityName,
t3.comStatus,
t3.activitytime,
t4.orderId,
t4.coType,
t4.orderSource,
t4.receiptId,
t4.receiptAmount ,
t4.receiptTradeDate,
t4.achieve_type,
cast(now() as string) as data_created_time
-- 【运营数据分析—前置用户登录表】 
from ads_fineReport.operation_analysis_data_login_user t1 
-- 【运营数据分析—前置行为线索信息表】 
left join ads_fineReport.operation_analysis_data_clue t2 on t1.oid = t2.clueLoginid
-- 【用户参与活动信息表】 活动类型、活动id、活动名称、参加活动时间、参与/完成 状态  完成:1 
left join (
    select act_type , activityactTypeid ,activityName ,comStatus ,activitytime ,userId
    from ads_fineReport.operation_analysis_activity_info oaai where activitytime >= '2023-01-01' and userId <> ''
) t3 on t1.userId = t3.userId  
-- 【运营数据分析—前置订单表】 
left join ads_fineReport.operation_analysis_data_order t4 on t1.userId = t4.userId 
group by 
t1.userId, t1.loginDate, t1.registeredDate, t1.user_type, t1.clientType,
t2.b_clueId , t2.c_clueId , t2.clueActivityid , t2.pageName , t2.originIdend , t2.end_originName1 , 
t2.end_originName2 , t2.originId , t2.originName1 , t2.originName2 , t2.clueWay_ori , t2.clueWay ,
t2.clueEseeidend , t2.end_userName , t2.end_eeNo , t2.end_dptId , t2.end_dptName , t2.clueEseeid , 
t2.userName , t2.eeNo , t2.dptId , t2.dptName , t2.projectIdend , t2.end_projectName2 , t2.end_projectId1 ,
t2.end_projectName1 , t2.projectId , t2.projectName2 , t2.projectId1 , t2.projectName1 , t2.clueEndname , t2.clueStatus , 
t3.act_type, t3.activityactTypeid, t3.activityName, t3.comStatus, t3.activitytime,
t4.orderId, t4.coType, t4.orderSource, t4.receiptId, t4.receiptAmount , t4.receiptTradeDate, t4.achieve_type

posted on 2025-02-24 14:39  cloud_wh  阅读(46)  评论(0)    收藏  举报

导航