不分发数据,使用单个reducer

set mapred.reduce.tasks=1;

select * 
from dw.dw_app 
where 
dt>='2016-09-01' 
and dt <='2016-09-18' 
order by stime
limit 30000;

 

包多一层,是用order by

select t.* from 
(
select *
from dw.dw_app 
where 
dt>='2016-09-01' 
and dt <='2016-09-18' 
and app_id='16099'
and msgtype = 'role.recharge' 
) t
order by t.stime 
limit 5000;

 

 

 

把所有具有相同的行最终都在一个reducer分区中,在在一个reducer中排序。 cluster by column=distribute by column+sort by colum

select * 
from dw.dw_app 
where 
dt>='2016-09-01' 
and dt <='2016-09-18' 
and app_id='16099'
and msgtype = 'role.recharge' 
cluster by dt
limit 30000;

 

查询每天前十名充值用户和充值总额

select t3.*
  from (select t2.*
          from (select dt,
                       account_id,
                       sum(recharge_money) as total_money,
                       row_number() over(partition by dt order by sum(recharge_money) desc) rank
                  from (select dt, account_id, recharge_money
                          from dw.dw_app
                         where dt >= '2016-09-01'
                           and dt <= '2016-09-18'
                           and app_id = '16099'
                           and msgtype = 'role.recharge' 
                cluster by dt, account_id) t group by dt, account_id) t2
where t2.rank <= 10) t3 order by t3.dt asc, rank asc limit 300;

 

posted on 2016-10-09 11:12  凌度  阅读(3210)  评论(0编辑  收藏  举报