|NO.Z.00030|——————————|BigDataEnd|——|Hadoop&Hive.V30|——|Hive.v30|Hive案例综合案例.v01|
一、需求描述:针对销售数据,完成统计:
### --- 需求描述:针对销售数据,完成统计:
~~~     按年统计销售额
~~~     销售金额在 10W 以上的订单
~~~     每年销售额的差值
~~~     年度订单金额前10位(年度、订单号、订单金额、排名)
~~~     季度订单金额前10位(年度、季度、订单id、订单金额、排名)
~~~     求所有交易日中订单金额最高的前10位
~~~     每年度销售额最大的交易日
~~~     年度最畅销的商品(即每年销售金额最大的商品)二、数据说明
| 日期表(dimdate) | ||
| dt | date | 日期 | 
| yearmonth | int | 年月 | 
| year | smallint | 年 | 
| month | tinyint | 月 | 
| day | tinyint | 日 | 
| week | tinyint | 周几 | 
| weeks | tinyint | 第几周 | 
| quat | tinyint | 季度 | 
| tendays | tinyint | 旬 | 
| halfmonth | tinyint | 半月 | 
| 订单表(sale) | ||
| orderid | string | 订单号 | 
| locationid | string | 交易位置 | 
| dt | date | 交易日期 | 
| 订单销售明细表(saledetail) | ||
| orderid | string | 订单号 | 
| rownum | int | 行号 | 
| itemid | string | 货品 | 
| num | int | 数量 | 
| price | double | 单价 | 
| amount | double | 金额 | 
二、实现
### --- 步骤一:创建表
~~~     将数据存放在ORC文件中
~~~     # createtable.hql
hive (tuning)>  drop database sale cascade;
hive (tuning)>  create database if not exists sale;
create table sale.dimdate_ori(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
)
row format delimited
fields terminated by ",";hive (tuning)> create table sale.sale_ori(
orderid string,
locationid string,
dt date
)
row format delimited
fields terminated by ",";
hive (tuning)> create table sale.saledetail_ori(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)
row format delimited
fields terminated by ",";hive (tuning)> create table sale.dimdate(
dt date,
    yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
) stored as orc;
hive (tuning)> create table sale.sale(
orderid string,
locationid string,
dt date
) stored as orc;hive (tuning)> create table sale.saledetail(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)stored as orc;
[root@linux122 ~]# hive -f createtable.hql### --- 步骤二:导入数据
~~~     # 加载数据
hive (tuning)> use sale;
hive (sale)> load data local inpath "/home/hadoop/data/tbDate.dat" overwrite into table dimdate_ori;
hive (sale)> load data local inpath "/home/hadoop/data/tbSale.dat" overwrite into table sale_ori;
hive (sale)> load data local inpath "/home/hadoop/data/tbSaleDetail.dat" overwrite into table saledetail_ori;
~~~     # 导入数据
hive (sale)> insert into table dimdate select * from dimdate_ori;
hive (sale)> insert into table sale select * from sale_ori;
hive (sale)> insert into table saledetail select * from saledetail_ori;
[root@linux122 ~]# hive -f loaddata.hql### --- SQL实现
~~~     按年统计销售额
hive (sale)> SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
FROM saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt);
~~输出参数
year    amount
2004    326.81
2005    1325.76
2006    1368.1
2007    1671.94
2008    1467.43
2009    632.37
2010    21.09
3274    0.17~~~     # 销售金额在 10W 以上的订单
hive (sale)> SELECT orderid, round(sum(amount), 2) amount
FROM saledetail
group by orderid
having sum(amount) > 100000;
~~输出参数
orderid amount
HMJSL00009024   119084.8
HMJSL00009958   159126.0~~~     # 每年销售额的差值
hive (sale)> SELECT year, round(amount, 2) amount, round(lag(amount) over
(ORDER BY year), 2) prioramount
,round(amount - lag(amount) over (ORDER BY year), 2) diff
from (SELECT year(B.dt) year, sum(A.amount) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt)
) tmp;
~~输出参数
year    amount  prioramount diff
2004    3268115.5   NULL    NULL
2005    1.325756415E7   3268115.5   9989448.65
2006    1.36809829E7    1.325756415E7   423418.75
2007    1.671935456E7   1.36809829E7    3038371.66
2008    1.46742953E7    1.671935456E7   -2045059.26
2009    6323697.19  1.46742953E7    -8350598.11
2010    210949.66   6323697.19  -6112747.53
3274    1703.0  210949.66   -209246.66### --- 年度订单金额前10位(年度、订单号、订单金额、排名)
~~~     # 方法一
hive (sale)> SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY
amount desc) rank
from (SELECT year(B.dt) dt, A.orderid, sum(A.amount)
amount
from saledetail A join sale B on
A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
) tmp1
) tmp2
where rank <= 10;
~~输出参数
dt  orderid amount  rank
2004    HMJSL00001557   23656.79999999997   1
2004    HMJSL00001556   22010.599999999984  2
2004    HMJSL00001349   17147.2 3
2004    HMJSL00001531   16605.6 4
2004    HMJSL00001567   15651.0 5
2004    HMJSL00001562   15429.999999999989  6
2004    HMJSL00000706   15266.0 7
2004    HMJSL00000656   15021.319999999998  8
2004    HMJSL00001568   14444.0 9
2004    HMJSL00000963   14268.800000000001  10~~~     # 方法二
hive (sale)> with tmp as (
SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
from saledetail A join sale B on A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
)
SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY
amount desc) rank
from tmp
) tmp2
where rank <= 10;
~~输出参数
dt  orderid amount  rank
2004    HMJSL00001557   23656.79999999997   1
2004    HMJSL00001556   22010.599999999984  2
2004    HMJSL00001349   17147.2 3
2004    HMJSL00001531   16605.6 4
2004    HMJSL00001567   15651.0 5
2004    HMJSL00001562   15429.999999999989  6
2004    HMJSL00000706   15266.0 7
2004    HMJSL00000656   15021.319999999998  8
2004    HMJSL00001568   14444.0 9### --- 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
~~~     # 方法一
hive (sale)> with tmp as (
select C.year, C.quat, A.orderid, round(sum(B.amount), 2)
amount
from sale A join saledetail B on A.orderid=B.orderid
join dimdate C on A.dt=C.dt
group by C.year, C.quat, A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order
by amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数
year    quat    orderid amount  rank
2004    1   HMJSL00002525   3113.84 1
2004    3   HMJSL00000609   10730.0 1
2004    3   HMJSL00000600   9031.2  2
2004    3   HMJSL00000606   8880.8  3
2004    3   HMJSL00000571   8570.16 4
2004    3   HMJSL00000610   8094.0  5
2004    3   HMJSL00000574   7540.1  6
2004    3   HMJSL00000561   6981.6  7
2004    3   HMJSL00000615   6946.0  8
2004    3   HMJSL00000616   6509.0  9
2004    3   HMJSL00000627   6164.8  10~~~     # 方法二
hive (sale)> with tmp as(
select year(A.dt) year,
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
A.orderid,
round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid = B.orderid
group by year(A.dt),
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
else 4 end,
A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order
by amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数~~~     # 方法三:求季度
hive (sale)> select floor(month(dt/3.1)) + 1;
hive (sale)> with tmp as (
select year(A.dt) year, floor(month(A.dt)/3.1) + 1 quat,
A.orderid,
round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order
by amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数
year    quat    orderid amount  rank
2004    1   HMJSL00002525   3113.84 1
2004    3   HMJSL00000609   10730.0 1
2004    3   HMJSL00000600   9031.2  2
2004    3   HMJSL00000606   8880.8  3
2004    3   HMJSL00000571   8570.16 4
2004    3   HMJSL00000610   8094.0  5
2004    3   HMJSL00000574   7540.1  6
2004    3   HMJSL00000561   6981.6  7
2004    3   HMJSL00000615   6946.0  8
2004    3   HMJSL00000616   6509.0  9
2004    3   HMJSL00000627   6164.8  10### --- 求所有交易日中订单金额最高的前10位
~~~     topN问题:
~~~     基础数据
~~~     上排名函数
~~~     解决N的问题
hive (sale)> with tmp as (
select A.dt, A.orderid, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt, A.orderid
)
select dt, orderid, amount, rank
from (
select dt, orderid, amount, dense_rank() over(order by
amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数
dt  orderid amount  rank
2007-07-31  HMJSL00009958   159126.0    1
2007-04-24  HMJSL00009024   119084.8    2
2008-01-15  HMJSL00010598   55828.0 3
2007-07-31  HMJSL00009957   52422.0 4
2007-10-09  HMJSL00010216   49800.0 5
2007-07-31  HMJSL00009956   43018.0 6
2007-11-04  HMJSL00010339   42157.6 7
2007-02-07  HMJSL00008593   41902.4 8
2007-09-27  HMJSL00010137   40340.0 9
2005-05-31  HMJSL00003263   38186.4 10### --- 每年度销售额最大的交易日
hive (sale)> with tmp as (
select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt
)
select year(dt) year, max(amount) dayamount
from tmp
group by year(dt);
~~输出参数
year    dayamount
2004    160121.08
2005    189984.81
2006    232302.62
2007    309427.1
2008    151134.0
2009    71127.0
2010    28161.8
3274    1703.0~~~     # 备注:以上求解忽略了交易日,以下SQL更符合题意
hive (sale)> with tmp as (
select dt, amount, dense_rank() over (partition by year(dt)
order by amount desc) as rank
from (select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt) tab1
)
select year(dt) as year, dt, amount
from tmp
where rank=1;
~~输出参数
year    dt  amount
2004    2004-11-01  160121.08
2005    2005-10-03  189984.81
2006    2006-04-29  232302.62
2007    2007-07-31  309427.1
2008    2008-01-20  151134.0
2009    2009-01-24  71127.0
2010    2010-01-01  28161.8
3274    3274-12-12  1703.0### --- 年度最畅销的商品(即每年销售金额最大的商品)
hive (sale)> with tmp as (
select year(B.dt) year, goods, round(sum(amount),2) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt), goods
)
select year, goods, amount
from (select year, goods, amount, dense_rank() over
(partition by year order by amount desc) rank
from tmp) tmp1
where rank = 1;
~~输出参数
year    goods   amount
2004    JY424420810101  53401.76
2005    24124118880102  56627.33
2006    JY425468460101  113720.6
2007    JY425468460101  70225.1
2008    E2628204040101  98003.6
2009    YL327439080102  30029.2
2010    SQ429425090101  4494.0
3274    YA217390232301  698.0Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor
 
                    
                     
                    
                 
                    
                 
                
            
         浙公网安备 33010602011771号
浙公网安备 33010602011771号 
