大数据测试
1、数据导入:
要求将样表文件中的(sales_sample_20170310)数据导入HIVE数据仓库中。
2、数据清洗:
要求将day_id一列中的数值清洗为真实的日期格式,可用字符串表示。
数据1对应日期2021-09-01,依次类推,15对应日期2021-09-15
3、数据分析处理:
(1)统计每天各个机场的销售数量和销售金额。
要求的输出字段
day_id,sale_nbr,,cnt,round
日期编号,卖出方代码,数量,金额
(2)统计每天各个代理商的销售数量和销售金额。
要求的输出字段
day_id,sale_nbr,,cnt,round
日期编号,卖出方代码,数量,金额
(3)统计每天各个代理商的销售活跃度。
要求的输出字段
day_id,sale_nbr, sale_number
日期编号,卖出方代码,交易次数(买入或者卖出均算交易次数)
(4)汇总统计9月1日到9月15日之间各个代理商的销售利润。
编号,卖出方代码,买入数量,买入金额,卖出数量,卖出金额,销售利润(卖出金额-买入金额)
(5)设计分析代理商的市场地位根据市场交易次数、交易对象个数、销售机票数量、销售利润等。(选做题)
4、处理结果入库:
将上述统计分析的结果数据保存到mySQL数据库中。
5、数据可视化展示:
利用Echarts将上述统计结果以图形化展示的方式展现出来:饼图、柱状图、地图、折线图等。
1 CREATE TABLE IF NOT EXISTS sales_sample ( 2 day_id STRING, 3 sale_nbr STRING, 4 buy_nbr STRING, 5 cnt INT, 6 round INT 7 ) 8 ROW FORMAT DELIMITED 9 FIELDS TERMINATED BY ',' 10 STORED AS TEXTFILE; 11 12 13 CREATE TABLE IF NOT EXISTS target_sales_sample ( 14 day_id STRING, 15 sale_nbr STRING, 16 buy_nbr STRING, 17 cnt INT, 18 round INT 19 ) 20 ROW FORMAT DELIMITED 21 FIELDS TERMINATED BY ',' 22 STORED AS TEXTFILE; 23 24 -- -- 将数据上传到HDFS 25 -- -- hadoop fs -put employees.csv / 26 27 -- -- 导入数据到Hive表 28 LOAD DATA INPATH '/xiyou/sales_sample_20170310.csv' INTO TABLE target_sales_sample; 29 -- 30 -- -- 验证数据导入 31 SELECT * FROM target_sales_sample ; 32 SELECT * FROM sales_sample ; 33 -- -- 使用Hive的UDF将day_id映射为日期格式 34 35 insert overwrite table sales_sample 36 select 37 date_add('2023-09-00',cast(day_id as int)) as day_id, 38 sale_nbr as sale_nbr, 39 buy_nbr as buy_nbr, 40 cnt as cnt, 41 round as round 42 from target_sales_sample; 43 44 -- drop table sales_sample; 45 -- drop table jichang; 46 47 -- (1)统计每天各个机场的销售数量和销售金额。 48 -- 要求的输出字段 49 -- day_id,sale_nbr,,cnt,round 50 -- 日期编号,卖出方代码,数量,金额 51 52 CREATE TABLE IF NOT EXISTS jichang ( 53 day_id STRING, 54 sale_nbr STRING, 55 cnt INT, 56 round INT 57 ) 58 ROW FORMAT DELIMITED 59 FIELDS TERMINATED BY ',' 60 STORED AS TEXTFILE; 61 62 insert into table jichang 63 select 64 day_id, 65 sale_nbr, 66 sum(cnt) as cnt, 67 sum(round) as round 68 from sales_sample 69 group by sale_nbr,day_id having sale_nbr like 'C%'; 70 71 SELECT * FROM jichang ; 72 73 -- (2)统计每天各个代理商的销售数量和销售金额。 74 -- 要求的输出字段 75 -- day_id,sale_nbr,,cnt,round 76 -- 日期编号,卖出方代码,数量,金额 77 CREATE TABLE IF NOT EXISTS dailishang ( 78 day_id STRING, 79 sale_nbr STRING, 80 cnt INT, 81 round INT 82 ) 83 ROW FORMAT DELIMITED 84 FIELDS TERMINATED BY ',' 85 STORED AS TEXTFILE; 86 87 insert into table dailishang 88 select 89 day_id, 90 sale_nbr, 91 sum(cnt) as cnt, 92 sum(round) as round 93 from sales_sample 94 group by sale_nbr,day_id having sale_nbr like 'O%'; 95 96 SELECT * FROM dailishang ; 97 98 -- (1)创建表存放每天代理商卖出的活跃度 99 CREATE TABLE IF NOT EXISTS dailishang_active ( 100 day_id STRING, 101 sale_nbr STRING, 102 sale_number INT 103 ) 104 ROW FORMAT DELIMITED 105 FIELDS TERMINATED BY ',' 106 STORED AS TEXTFILE; 107 108 insert into table dailishang_active 109 select 110 day_id, 111 sale_nbr, 112 count(*) as sale_number 113 from sales_sample 114 group by sale_nbr,day_id having sale_nbr like 'O%'; 115 116 SELECT * FROM dailishang_active ; 117 118 -- 2、创建表存放每天代理商买入的活跃度 119 CREATE TABLE IF NOT EXISTS dailishang_active2 ( 120 day_id STRING, 121 sale_nbr STRING, 122 sale_number INT 123 ) 124 ROW FORMAT DELIMITED 125 FIELDS TERMINATED BY ',' 126 STORED AS TEXTFILE; 127 128 insert into table dailishang_active2 129 select 130 day_id, 131 buy_nbr as sale_nbr, 132 count(*) as sale_number 133 from sales_sample 134 group by buy_nbr,day_id having buy_nbr like 'O%'; 135 136 SELECT * FROM dailishang_active2 ; 137 138 -- 3、创建表统计每天代理商活跃度 139 CREATE TABLE IF NOT EXISTS dailishang_huoyue ( 140 day_id STRING, 141 sale_nbr STRING, 142 sale_number INT 143 ) 144 ROW FORMAT DELIMITED 145 FIELDS TERMINATED BY ',' 146 STORED AS TEXTFILE; 147 148 insert into table dailishang_huoyue 149 select dailishang_active.day_id as day_id, 150 dailishang_active.sale_nbr as sale_nbr, 151 dailishang_active.sale_number+dailishang_active2.sale_number as sale_number 152 from dailishang_active join dailishang_active2 on (dailishang_active.sale_nbr=dailishang_active2.sale_nbr) ; 153 154 SELECT * FROM dailishang_huoyue; 155 156 -- (1)创建表存放每天代理商卖出的数量和金额 157 CREATE TABLE IF NOT EXISTS mai_chu ( 158 day_id STRING, 159 sale_nbr STRING, 160 cnt int, 161 round int 162 ) 163 ROW FORMAT DELIMITED 164 FIELDS TERMINATED BY ',' 165 STORED AS TEXTFILE; 166 167 insert into table mai_chu 168 select 169 day_id, 170 sale_nbr, 171 sum(cnt) as cnt, 172 sum(round) as round 173 from sales_sample 174 where sale_nbr like 'O%' 175 group by sale_nbr,day_id ; 176 177 SELECT * FROM mai_chu ; 178 179 -- (2)创建表存放每天代理商买入的数量和金额 180 CREATE TABLE IF NOT EXISTS mai_ru ( 181 day_id STRING, 182 buy_nbr STRING, 183 cnt int, 184 round int 185 ) 186 ROW FORMAT DELIMITED 187 FIELDS TERMINATED BY ',' 188 STORED AS TEXTFILE; 189 190 insert into table mai_ru 191 select 192 day_id, 193 buy_nbr, 194 sum(cnt) as cnt, 195 sum(round) as round 196 from sales_sample 197 where buy_nbr like 'O%' 198 group by buy_nbr,day_id ; 199 200 SELECT * FROM mai_ru ; 201 202 -- (3)创建表存放每天代理商的销售利润 203 CREATE TABLE IF NOT EXISTS dai_li ( 204 day_id STRING, 205 sale_nbr STRING, 206 incnt int, 207 inround int, 208 outcnt int, 209 outround int, 210 lirun int 211 ) 212 ROW FORMAT DELIMITED 213 FIELDS TERMINATED BY ',' 214 STORED AS TEXTFILE; 215 216 insert into table dai_li 217 select mai_chu.day_id as day_id, 218 mai_chu.sale_nbr as sale_nbr, 219 mai_ru.cnt as incnt, 220 mai_ru.round as inround, 221 mai_chu.cnt as outcnt, 222 mai_chu.round as outround, 223 mai_chu.round-mai_ru.round as lirun 224 from mai_chu join mai_ru on (mai_ru.buy_nbr=mai_chu.sale_nbr); 225 226 SELECT * FROM sales_sample; 227 SELECT * FROM jichang; 228 SELECT * FROM dailishang; 229 SELECT * FROM dailishang_huoyue; 230 SELECT * FROM dai_li;

浙公网安备 33010602011771号