今日总结9.23
2、数据清洗:
要求将day_id一列中的数值清洗为真实的日期格式,可用字符串表示。数据1对应日期2021-09-01,依次类推,15对应日期2021-09-15
//创建表格存放清洗后的数据: create table sales_day (day_id string, sale_nbr string, buy_nbr string, cnt string, round string) row format delimited fields terminated by ',';
//插入清洗后的数据
insert overwrite table sales_day select date_add('2023-09-00',cast(day_id as int)) as day_id,sale_nbr as sale_nbr,buy_nbr as buy_nbr,cnt as cnt,round as round from sale_add;
3、数据分析处理:
(1)统计每天各个机场的销售数量和销售金额。
要求的输出字段
day_id,sale_nbr,,cnt,round
日期编号,卖出方代码,数量,金额
create table sale_jich(day_id string, sale_nbr string, cnt string,round string) row format delimited fields terminated by ',';
insert into table sale_jich
select day_id,
sale_nbr,
sum(cnt) as cnt,
sum(round)
as round from sales_day group by sale_nbr,day_id having sale_nbr like 'C%';
(2)统计每天各个代理商的销售数量和销售金额。
要求的输出字段
day_id,sale_nbr,,cnt,round
日期编号,卖出方代码,数量,金额
create table day_sale(day_id string, sale_nbr string, cnt string, round string) row format delimited fields terminated by ',';
insert into table day_sale select day_id,sale_nbr,sum(cnt) as cnt,sum(round) as round from sales_day group by sale_nbr,day_id having sale_nbr like 'O%';
(3)统计每天各个代理商的销售活跃度。
要求的输出字段
day_id,sale_nbr, sale_number
日期编号,卖出方代码,交易次数(买入或者卖出均算交易次数)
//1)创建表存放每天代理商卖出的活跃度 create table daili_mc(day_id string,sale_nbr string,count int) row format delimited fields terminated by ','; insert into table daili_mc select day_id,sale_nbr,count(*) as count from sales_day where sale_nbr like 'O%' group by sale_nbr,day_id; //2、创建表存放每天代理商买入的活跃度 create table daili_mr(day_id string,sale_nbr string,count int) row format delimited fields terminated by ','; insert into table daili_mr select day_id,buy_nbr as sale_nbr,count(*) as count from sales_day where buy_nbr like 'O%' group by buy_nbr,day_id; //3、创建表统计每天代理商活跃度 create table daili_hy(day_id string,sale_nbr string,sale_number int) row format delimited fields terminated by ','; insert into table daili_hy select daili_mc.day_id as day_id,daili_mc.sale_nbr as sale_nbr,daili_mc.count+daili_mr.count as sale_number from daili_mc join daili_mr on (daili_mc.sale_nbr=daili_mr.sale_nbr) ;
(4)汇总统计9月1日到9月15日之间各个代理商的销售利润。
编号,卖出方代码,买入数量,买入金额,卖出数量,卖出金额,销售利润(卖出金额-买入金额)
//4、汇总统计 9 月 1 日到 9 月 15 日之间各个代理商的销售利润。 //1)创建表存放每天代理商卖出的数量和金额 create table daiout(day_id string,sale_nbr string,cnt int,round int) row format delimited fields terminated by ','; insert into table daiout select day_id,sale_nbr,sum(cnt) as cnt,sum(round) as round from sales_day where sale_nbr like 'O%' group by day_id,sale_nbr; //(2)创建表存放每天代理商买入的数量和金额 create table daiin(day_id string,buy_nbr string,cnt int,round int) row format delimited fields terminated by ','; insert into table daiin select day_id,buy_nbr,sum(cnt) as cnt,sum(round) as round from sales_day where buy_nbr like 'O%' group by day_id,buy_nbr; //(3)创建表存放每天代理商的销售利润 create table daili_ly(day_id string,sale_nbr string,incnt int,inround int,outcnt int,outround int,lirun int) row format delimited fields terminated by ','; insert into table daili_ly select daiout.day_id as day_id,daiout.sale_nbr as sale_nbr,daiin.cnt as incnt,daiin.round as inround,daiout.cnt as outcnt,daiout.round as outround,daiout.round-daiin.round as lirun from daiout join daiin on (daiin.buy_nbr=daiout.sale_nbr);
浙公网安备 33010602011771号