
1、ADS层
-
统计指标的方法论
原子指标:下单金额,支付金额
派生指标=原子指标+统计周期+业务限定+统计维度
先在mysql中创建数据库gma_ads
1、支付金额
-
实时计算每个用户每天实时的支付金额
-
实时计算每个地区每天的支付金额
-
实时计算每种支付方式每天支付金额
-
实时统计每个大区每天的支付金额
-
实时统计不同性别每天支付金额
1 -- 1、创建msyql sink表 2 -- flink sql jdbc sink表 3 CREATE TABLE gma_ads.ads_mysql_user_day_sum_payment_price ( 4 user_id BIGINT, 5 day_id STRING, 6 sum_payment_price decimal(10,2), 7 PRIMARY KEY (user_id,day_id) NOT ENFORCED -- 按照主键更新数据 8 ) WITH ( 9 'connector' = 'jdbc', 10 'url' = 'jdbc:mysql://master:3306/gma_ads?useUnicode=true&characterEncoding=UTF-8', 11 'table-name' = 'ads_mysql_user_day_sum_payment_price', -- 需要手动到数据库中创建表 12 'username' = 'root', 13 'password' = '123456' 14 ); 15 16 17 -- 在mysql中创建表 18 CREATE TABLE `ads_mysql_user_day_sum_payment_price` ( 19 `user_id` BIGINT NOT NULL, 20 `day_id` varchar(255) NOT NULL, 21 `sum_payment_price` decimal(10,2), 22 PRIMARY KEY (`user_id`,`day_id`) 23 ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 24 25 --实时统计 26 insert into gma_ads.ads_mysql_user_day_sum_payment_price 27 select 28 user_id, 29 substr(payment_time,1,10) as day_id, 30 sum(payment_price) as sum_payment_price 31 from gma_dwd.dwd_kafka_payment_info 32 group by user_id,substr(payment_time,1,10);
1 -- 1、创建msyql sink表 2 -- flink sql jdbc sink表 3 CREATE TABLE gma_ads.ads_mysql_proc_day_sum_payment_price ( 4 pro_name STRING, 5 day_id STRING, 6 sum_payment_price decimal(10,2), 7 PRIMARY KEY (pro_name,day_id) NOT ENFORCED -- 按照主键更新数据 8 ) WITH ( 9 'connector' = 'jdbc', 10 'url' = 'jdbc:mysql://master:3306/gma_ads?useUnicode=true&characterEncoding=UTF-8', 11 'table-name' = 'ads_mysql_proc_day_sum_payment_price', -- 需要手动到数据库中创建表 12 'username' = 'root', 13 'password' = '123456' 14 ); 15 16 17 -- 在mysql中创建表 18 CREATE TABLE `ads_mysql_proc_day_sum_payment_price` ( 19 `pro_name` varchar(255) NOT NULL, 20 `day_id` varchar(255) NOT NULL, 21 `sum_payment_price` decimal(10,2), 22 PRIMARY KEY (`pro_name`,`day_id`) 23 ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 24 25 --实时统计 26 -- 实时关联读取维度表获取省名 27 insert into gma_ads.ads_mysql_proc_day_sum_payment_price 28 select 29 b.info.pro_name as pro_name, 30 substr(payment_time,1,10) as day_id, 31 sum(payment_price) as sum_payment_price 32 from gma_dwd.dwd_kafka_payment_info as a 33 LEFT JOIN 34 gma_dim.dim_hbase_region FOR SYSTEM_TIME AS OF a.proc_time as b 35 on a.province_id=b.pro_id 36 group by b.info.pro_name,substr(payment_time,1,10);
2、下单笔数
-
每个省份每天实时下单的数量
-
每个大区每天实时下单的数量
-
实时统计每个品牌每天下单的数量
-
实时统计每个用户每天下单的数量
1 -- 创建mysql sin表 2 CREATE TABLE gma_ads.ads_mysql_proc_day_order_num ( 3 pro_name STRING, 4 day_id STRING, 5 num bigint, 6 PRIMARY KEY (pro_name,day_id) NOT ENFORCED -- 按照主键更新数据 7 ) WITH ( 8 'connector' = 'jdbc', 9 'url' = 'jdbc:mysql://master:3306/gma_ads?useUnicode=true&characterEncoding=UTF-8', 10 'table-name' = 'ads_mysql_proc_day_order_num', -- 需要手动到数据库中创建表 11 'username' = 'root', 12 'password' = '123456' 13 ); 14 15 16 -- 在mysql中创建表 17 CREATE TABLE `ads_mysql_proc_day_order_num` ( 18 `pro_name` varchar(255) NOT NULL, 19 `day_id` varchar(255) NOT NULL, 20 `num` bigint, 21 PRIMARY KEY (`pro_name`,`day_id`) 22 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
2、每个大区每天实时下单的数量
1 -- 创建mysql sin表 2 CREATE TABLE gma_ads.ads_mysql_region_day_order_num ( 3 region_name STRING, 4 day_id STRING, 5 num bigint, 6 PRIMARY KEY (region_name,day_id) NOT ENFORCED -- 按照主键更新数据 7 ) WITH ( 8 'connector' = 'jdbc', 9 'url' = 'jdbc:mysql://master:3306/gma_ads?useUnicode=true&characterEncoding=UTF-8', 10 'table-name' = 'ads_mysql_region_day_order_num', -- 需要手动到数据库中创建表 11 'username' = 'root', 12 'password' = '123456' 13 ); 14 15 16 -- 在mysql中创建表 17 CREATE TABLE `ads_mysql_region_day_order_num` ( 18 `region_name` varchar(255) NOT NULL, 19 `day_id` varchar(255) NOT NULL, 20 `num` bigint, 21 PRIMARY KEY (`region_name`,`day_id`) 22 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
3、实时统计每个品牌每天下单的数量
1 LOAD MODULE hive WITH ('hive-version' = '1.2.1'); 2 3 4 select c.info.tm_name as tm_name,day_id,count(1) as num from ( 5 select 6 sku_id, 7 DATE_FORMAT(create_time,'yyyy-MM-dd') as day_id, 8 PROCTIME() as proc_time 9 from 10 gma_dwd.dwd_kafka_order_info as a, 11 LATERAL TABLE(explode(split(skus,','))) t(sku_id) 12 ) as b 13 left join 14 gma_dim.dim_hbase_item_info FOR SYSTEM_TIME AS OF b.proc_time as c 15 on cast(b.sku_id as bigint)=c.sku_id 16 group by c.info.tm_name,day_id
在一个任务中执行多条sql
1 EXECUTE STATEMENT SET 2 BEGIN 3 -- 每个省份每天实时下单的数量 4 insert into gma_ads.ads_mysql_proc_day_order_num 5 select 6 b.info.pro_name, 7 DATE_FORMAT(create_time,'yyyy-MM-dd') as day_id, 8 count(1) as num 9 from 10 gma_dwd.dwd_kafka_order_info as a 11 left join 12 gma_dim.dim_hbase_region FOR SYSTEM_TIME AS OF a.proc_time as b 13 on a.province_id=b.pro_id 14 group by b.info.pro_name,DATE_FORMAT(create_time,'yyyy-MM-dd'); 15 16 -- 每个大区每天实时下单的数量 17 insert into gma_ads.ads_mysql_region_day_order_num 18 select 19 b.info.region_name, 20 DATE_FORMAT(create_time,'yyyy-MM-dd') as day_id, 21 count(1) as num 22 from 23 gma_dwd.dwd_kafka_order_info as a 24 left join 25 gma_dim.dim_hbase_region FOR SYSTEM_TIME AS OF a.proc_time as b 26 on a.province_id=b.pro_id 27 group by b.info.region_name,DATE_FORMAT(create_time,'yyyy-MM-dd'); 28 29 END;
2、问题
1、flink将更新的数据批量写入hbase时数据丢失的问题 (BUG)
-
解决方法:将批量写入改成条写入
-