hive数据分析

题目:

样表(sales_sample_20170310)字段说明:
day_id     日期编号;
sale_nbr    卖出方代码;
buy_nbr    买入方代码;
cnt        数量
round      金额
卖出方和买入方又分为3种类型:
以'C'开头的表示类型为C,代表“航空公司”,只可以卖出,不可以买入;
以'O'开头的表示类型为O,代表“代理人”,既可以卖出,也可以买入,并且允许自己卖给自己(简单来讲:每个“代理人”代码可能对应多个售票点,售票点之前有交换票的情况,所以体现为自己卖给了自己);
'PAX'表示类型为PAX,代表“旅客”,只可以买入,不可以卖出。

先正常启动hive

 

准备数据

create table sales_sample_20170310
(
    `day_id`   string comment '时间',
    `sale_nbr` string comment '卖出方',
    `buy_nbr`  string comment '买入方',
    `cnt`      int comment '数量',
    `round`    int comment '金额'
)
    row format delimited
        fields terminated by ','
        lines terminated by '\n';
load data inpath '/hivedata/sales_sample_20170310.csv' into table sales_sample_20170310;

inpath之后为文件在hdfs中的位置

验证:

select *
from sales_sample_20170310
limit 10000;

 

简单清洗:

insert overwrite table sales_sample_20170310
select concat('2022-10-', day_id), sale_nbr, buy_nbr, cnt, round
from sales_sample_20170310;


create table sales_sample111 as
select to_date(from_unixtime(UNIX_TIMESTAMP(day_id, 'yyyy-MM-dd'))) as day_id,
       sale_nbr,
       buy_nbr,
       cnt,
       round
from sales_sample_20170310;


select *
from sales_sample111
limit 1000;

所需的四个表:

-- 1
CREATE TABLE sale_hangkong AS
SELECT day_id,
       sale_nbr,
       SUM(cnt)   AS cnt,
       SUM(round) AS round
FROM sales_sample111
WHERE sale_nbr LIKE 'C%'
GROUP BY day_id, sale_nbr;

select *
from sale_hangkong
limit 10000;

-- 2
create table day_sale as
SELECT day_id,
       sale_nbr,
       SUM(cnt)   AS cnt,
       SUM(round) AS rount
FROM sales_sample111
WHERE sale_nbr LIKE 'O%'
GROUP BY day_id,
         sale_nbr;

select *
from day_sale
limit 1000;

-- 3
create table huoyuedumaichu as
select day_id,
       sale_nbr,
       count(*) as sale_number
from sales_sample111
where sale_nbr like 'O%'
group by sale_nbr, day_id;



create table huoyuedumairu as
select day_id,
       buy_nbr,
       count(*) as sale_number
from sales_sample111
where buy_nbr like 'O%'
group by buy_nbr, day_id;

create table huoyuedu as
select a.day_id                      day_id,
       a.sale_nbr                    sale_nbr,
       a.sale_number + h.sale_number sale_number
from huoyuedumaichu a
         join huoyuedumairu h on a.day_id = h.day_id and a.sale_nbr = h.buy_nbr;


select *
from huoyuedu
limit 1000;

-- 4
--计算代理商买入数量金额
drop table mairu;
create table mairu as
select day_id,
       buy_nbr,
       sum(cnt)   as cnt,
       sum(round) as round
from sales_sample111
where buy_nbr like 'O%'
group by day_id, buy_nbr;

select *
from mairu
limit 1000;


create table maichu as
select day_id,
       sale_nbr,
       sum(cnt)   as cnt,
       sum(round) as round
from sales_sample111
where sale_nbr like 'O%'
group by day_id, sale_nbr;
select *
from maichu
limit 1000;


create table lirun as
select a.day_id          as day_id,
       b.sale_nbr        as nbr,
       a.cnt             as cnt_buy,
       a.round           as rount_buy,
       b.cnt             as cnt_sale,
       b.round           as round_sale,
       b.round - a.round as liren
from mairu a
         join maichu b on a.buy_nbr = b.sale_nbr and a.day_id = b.day_id
where a.day_id between '2022-10-01' and '2022-10-15';
select *
from lirun
limit 1000;

 

将数据通过sqoop传到mysql中:

第一个表类似

create table day_sale(
    day_id varchar(20) not null ,
    sale_nbr varchar(20),
    cnt_sum int ,
    round_sum int
);

bin/sqoop export --connect "mysql链接-到数据库?useUnicode=true&characterEncoding=utf-8" --username root --password 123456 --table day_sale --export-dir /user/hive/warehouse/db_msg.db/day_sale --fields-terminated-by "\001" --input-null-non-string '\\N'


create table huoyuedu(
    day_id varchar(50) ,
    sale_nbr varchar(20),
    sale_number int
);


bin/sqoop export --connect "mysql链接-到数据库?useUnicode=true&characterEncoding=utf-8" --username root --password 123456 --table huoyuedu --export-dir /user/hive/warehouse/db_msg.db/huoyuedu --fields-terminated-by "\001" --input-null-non-string '\\N'


create table lirun(
    day_id varchar(50) ,
    nbr varchar(20),
     cnt_buy int,
     rount_buy int,
     cnt_sale int,
     round_sale int,
     lirun int
);


bin/sqoop export --connect "mysql链接-到数据库?useUnicode=true&characterEncoding=utf-8" --username root --password 123456 --table lirun --export-dir /user/hive/warehouse/db_msg.db/lirun --fields-terminated-by "\001" --input-null-non-string '\\N'

 

 

最后进行springboot链接到虚拟机中的mysql与平常一样,就不再表述

 

posted @ 2023-09-23 12:33  花伤错零  阅读(50)  评论(0)    收藏  举报