oracle 开发 第08章 分析数据

2016-02-02

目录

一、评级函数
  1.RANK()和DENSE_RANK()函数
  2.CUME_DIST()和PERCENT_RANK()函数
  3.NTILE()函数
  4.ROW_NUMBER()函数
  5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数
二、窗口函数
  1.计算累积和
  2.计算移动平均值
  3.计算中心平均值
  4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行
  5.用NTH_VALUE()函数获取第n行
三、报表函数
  1.总计报表
  2.RATIO_TO_REPORT()函数
  3.LISTAGG()函数
  4.LAG()和LEAD()函数
  5.FIRST()和LAST()函数
四、线性回归函数
五、MODEL子句
  1.位置标记访问数据单元
  2.符号标记访问数据单元
  3.用between和and返回特定范围的数据单元
  4.用any和is any 访问所有数据单元
  5.用currentv()函数获取某个维度当前值
  6.用for循环访问数据单元
  7.使用is present
  8.使用presentv()函数
  9.使用presentnnv()函数
  10.使用ignore nav和keep nav
  11.使用rules udpate更新已有单元
六、PIVOT和UNPIVOT子句
  1.pivot简单示例
  2.转换多列
  3.使用多个聚合函数
  4.使用UNPIVOT子句

 

一、评级函数
1.RANK()和DENSE_RANK()函数

--返回数据项在分组中的排名
select * from all_sales where rownum <= 12;

select prd_type_id,
       sum(amount),
       rank() over(order by sum(amount) desc) as rank,
       dense_rank() over(order by sum(amount) desc) as dense_rank
  from all_sales
 where year = 2003
   and amount is not NULL
 group by prd_type_id
 order by prd_type_id;

select prd_type_id,
       sum(amount),
       rank() over(order by sum(amount) desc) as rank,
       dense_rank() over(order by sum(amount) desc) as dense_rank
  from all_sales
 where year = 2003
/*   and amount is not NULL*/
 group by prd_type_id
 order by prd_type_id;

--NULLS FIRST和NULLS LAST子句控制空值排名
select prd_type_id,
       sum(amount),
       rank() over(order by sum(amount) desc nulls last) as rank,
       dense_rank() over(order by sum(amount) desc nulls last) as dense_rank
  from all_sales
 where year = 2003
/*   and amount is not NULL*/
 group by prd_type_id
 order by prd_type_id;

--RANK()和PARTITION BY
select prd_type_id,
       month,
       sum(amount),
       rank() over(partition by month order by sum(amount) desc) as rank
  from all_sales
 where year = 2003
   and amount is not NULL
 group by prd_type_id, month
 order by prd_type_id, month;

--RANK()和ROLLUP()
select prd_type_id,
       sum(amount),
       rank() over(order by sum(amount) desc) as rank
  from all_sales
 where year = 2003
 group by rollup(prd_type_id)
 order by prd_type_id;
--RANK()和CUBE()
select prd_type_id,
       emp_id,
       sum(amount),
       rank() over(order by sum(amount) desc) as rank
  from all_sales
 where year = 2003
 group by cube(prd_type_id, emp_id)
 order by prd_type_id, emp_id;
 --RANK()和GROUPTING SETS
 select prd_type_id,
       emp_id,
       sum(amount),
       rank() over(order by sum(amount) desc) as rank
  from all_sales
 where year = 2003
 group by cube(prd_type_id, emp_id)
 order by prd_type_id, emp_id;

2.CUME_DIST()和PERCENT_RANK()函数

--CUME_DIST()计算某个特定值在一组值中的累积分布
--PERCENT_RANK计算某个值相对一组值的百分比排名
select prd_type_id,
       sum(amount),
       cume_dist() over(order by sum(amount) desc) as cume_dist,
       percent_rank() over(order by sum(amount) desc) as percent_rank
  from all_sales
 where year = 2003
 group by prd_type_id, emp_id
 order by prd_type_id, emp_id;

3.NTILE()函数

--计算n分片
select prd_type_id,
       sum(amount),
       ntile(4) over(order by sum(amount) desc) as ntile
  from all_sales
 where year = 2003
   and amount is not null
 group by prd_type_id
 order by prd_type_id;

4.ROW_NUMBER()函数

--从1开始,为分组中的每行返回一个数字
select prd_type_id,
       sum(amount),
       row_number() over(order by sum(amount) desc) as row_number
  from all_sales
 where year = 2003
   and amount is not null
 group by prd_type_id
 order by prd_type_id;

5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数

select percentile_cont(0.6) within group(order by sum(amount) desc) as percentile_cont,
       percentile_disc(0.6) within group(order by sum(amount) desc) as percentile_disc
  from all_sales
 where year = 2003
 group by prd_type_id;

二、窗口函数
1.计算累积和

select month,
       sum(amount) as month_amount,
       sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount
  from all_sales
 where year = 2003
 group by month
 order by month;

select month,
       sum(amount) as month_amount,
       sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount
  from all_sales
 where year = 2003
 and month between 6 and 12
 group by month
 order by month;

2.计算移动平均值

select month,
       sum(amount) as month_amount,
       avg(sum(amount)) over(order by month rows between 3 preceding and current row) as moving_average
  from all_sales
 where year = 2003
 group by month
 order by month;

3.计算中心平均值

select month,
       sum(amount) as month_amount,
       avg(sum(amount)) over(order by month rows between 1 preceding and 1 following) as moving_average
  from all_sales
 where year = 2003
 group by month
 order by month;

4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行

select month,
       sum(amount) as month_amount,
       first_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as previous_month_amount,
       last_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as next_month_amount
  from all_sales
 where year = 2003
 group by month
 order by month;

5.用NTH_VALUE()函数获取第n行

select month,
       sum(amount) as month_amount,
       nth_value(sum(amount), 2) over(order by month rows between unbounded preceding and unbounded following) as nth_value
  from all_sales
 where year = 2003
 group by month
 order by month;

select prd_type_id,
       emp_id,
       max(amount),
       nth_value(max(amount), 4) over(partition by prd_type_id order by emp_id rows between unbounded preceding and unbounded following) as nth_value
  from all_sales
 where prd_type_id between 1 and 3
 group by prd_type_id, emp_id
 order by prd_type_id, emp_id;

三、报表函数
1.总计报表

select month,
       prd_type_id,
       sum(sum(amount)) over(partition by month) as total_month,
       sum(sum(amount)) over(partition by prd_type_id) as total_product_type_amount
  from all_sales
 where year = 2003
   and month <= 3
 group by month, prd_type_id
 order by month, prd_type_id;

2.RATIO_TO_REPORT()函数

--计算某个值在一组值的总和中所占的比率
select month,
       prd_type_id,
       sum(amount) as prd_type_amount,
       ratio_to_report(sum(amount)) over(partition by month) as prd_type_ratio
  from all_sales
 where year = 2003
   and month <= 3
 group by month, prd_type_id
 order by month, prd_type_id;

3.LISTAGG()函数

--对分组内的各行排序并将分组的值集连接起来
select * from products order by product_id;

select listagg(name, ', ') within group(order by price, name) as "Product List",
       max(price) as "Most Expensive"
  from products
 where product_id <= 5;

select product_id,
       product_type_id,
       name,
       listagg(name, ', ') within group(order by name) over(partition by product_type_id) as "Product List"
  from products
 where product_id <= 5
 order by product_id, product_type_id;

4.LAG()和LEAD()函数

--获得位于距当前行指定距离的那一行数据
select month,
       sum(amount) as month_amount,
       lag(sum(amount), 1) over(order by month) as previous_month_amount,
       lead(sum(amount), 1) over(order by month) as next_month_amount
  from all_sales
 where year = 2003
 group by month
 order by month;

5.FIRST()和LAST()函数

--获取排序分组中第一个值和最后一个值
select min(month) keep (dense_rank first order by sum(amount)) as highest_sales_month,
       min(month) keep (dense_rank last order by sum(amount)) as lowest_sales_month
  from all_sales
 where year = 2003
 group by month
 order by month;

四、线性回归函数

select prd_type_id,
       regr_avgx(amount, month) as avgx,
       regr_avgy(amount, month) as avgy,
       regr_count(amount, month) as count,
       regr_intercept(amount, month) as inter,
       regr_r2(amount, month) as r2,
       regr_slope(amount, month) as slope,
       regr_sxx(amount, month) as sxx,
       regr_sxy(amount, month) as sxy,
       regr_syy(amount, month) as syy
  from all_sales
 where year = 2003
 group by prd_type_id
 order by prd_type_id;

select prd_type_id,
       sum(amount),
       rank() over(order by sum(amount) desc) as rank,
       percent_rank() over(order by sum(amount) desc) as percent_rank       
  from all_sales
 where year = 2003
   and amount is not null
 group by prd_type_id
 order by prd_type_id;

select rank(500000) within group(order by sum(amount) desc) as rank,
       percent_rank(500000) within group(order by sum(amount) desc) as percent_rank
  from all_sales
 where year = 2003
   and amount is not null
 group by prd_type_id
 order by prd_type_id;

五、MODEL子句
1.位置标记访问数据单元

select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount [ 1, 2004 ] = sales_amount [ 1, 2003 ], 
  sales_amount [ 2, 2004 ] = sales_amount [ 2, 2003 ] + sales_amount [ 3, 2003 ],
  sales_amount [ 3, 2004 ] = round(sales_amount [ 3, 2003 ] * 1.25, 2))
 order by prd_type_id, year, month;

2.符号标记访问数据单元

select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount [ month = 1, year = 2004 ] = sales_amount [ month = 1,year = 2003 ], 
 sales_amount [ month = 2, year = 2004] = sales_amount [ month = 2, year = 2003 ] + sales_amount [month = 3, year = 2003 ], 
 sales_amount [ month = 3, year = 2004] = round(sales_amount [ month = 3, year = 2003 ] * 1.25, 2))
 order by prd_type_id, year, month;

3.用between和and返回特定范围的数据单元

select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount[ 1, 2004] = round(avg(sales_amount) [ month between 1 and 3 ,2003], 2))
 order by prd_type_id, year, month;

4.用any和is any 访问所有数据单元

select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount[ 1, 2004] = round(sum(sales_amount) [ any, year is any], 2))
 order by prd_type_id, year, month;

5.用currentv()函数获取某个维度当前值

select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount[ 1, 2004] = round(sales_amount[ currentv(),2003] * 1.25, 2))
 order by prd_type_id, year, month;

6.用for循环访问数据单元

select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount[ for month from 1 to 3 increment 1,2004] = round(sales_amount[currentv(),2003] * 1.25,2))
 order by prd_type_id, year, month;

7.使用is present

--如果cell指定的行在model子句执行之前存在,那么is present返回true
select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
   case when sales_amount [ currentv(), 2003 ] is present 
     then
           round(sales_amount [ currentv(), 2003 ] * 1.25, 2)
      else
           0
      end)
 order by prd_type_id, year, month;

8.使用presentv()函数

--如果cell引用的行在model子句执行前存在,那么presentv(cell,expr1,expr2)返回表达式expr1;如果不存在,返回expr2
select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
   presentv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0)
   )
 order by prd_type_id, year, month;

9.使用presentnnv()函数

--如果cell引用的行在model子句执行前存在,并且该单元值不为空,那么presentv(cell,expr1,expr2)返回表达式expr1;如果行不存在或单元值为空,返回expr2
select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model 
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
   presentnnv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0)
   )
 order by prd_type_id, year, month;

10.使用ignore nav和keep nav

select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model ignore nav
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
 (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
   round(sales_amount[currentv(),2003] * 1.25,2)
   )
 order by prd_type_id, year, month;

11.使用rules udpate更新已有单元

--在单元不存在的情况下不创建新行
select prd_type_id, year, month, sales_amount
  from all_sales
 where prd_type_id between 1 and 2
   and emp_id = 21 
   model
   partition by(prd_type_id) 
   dimension by(month, year) 
   measures(amount sales_amount)
   rules update (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
   round(sales_amount[currentv(),2003] * 1.25,2)
   )
 order by prd_type_id, year, month;

六、PIVOT和UNPIVOT子句
1.pivot简单示例

select *
  from (select month, prd_type_id, amount
          from all_sales
         where year = 2003
           and prd_type_id in (1, 2, 3));

select *
  from (select month, prd_type_id, amount
          from all_sales
         where year = 2003
           and prd_type_id in (1, 2, 3)) 
 pivot(sum(amount) for month in(1 as jan,
                                2 as feb,
                                3 as mar,
                                4 as apr))
 order by prd_type_id;

2.转换多列

SELECT 
  * 
FROM
  (SELECT 
    MONTH,
    prd_type_id,
    amount 
  FROM
    all_sales 
  WHERE YEAR = 2003 
    AND prd_type_id IN (1, 2, 3)) pivot (
    SUM(amount) FOR (MONTH, prd_type_id) IN (
      (1, 2) AS jan_prdtype2,
      (2, 3) AS feb_prdtype3,
      (3, 1) AS mar_prdtype1,
      (4, 2) AS apr_prdtype2
    )
  ) ;

3.使用多个聚合函数

SELECT 
  * 
FROM
  (SELECT 
    MONTH,
    prd_type_id,
    amount 
  FROM
    all_sales 
  WHERE YEAR = 2003 
    AND prd_type_id IN (1, 2, 3)
    ) 
   PIVOT (
    SUM(amount) AS sum_amount,
    AVG(amount) AS avg_amount
    FOR (month) IN (1 AS JAN,2 AS FEB
    )
  )
  ORDER BY prd_type_id ;

4.使用UNPIVOT子句

SELECT *
FROM pivot_sales_data
UNPIVOT (
 amount FOR month IN (JAN,FEB,MAR,APR)
 )
 ORDER BY prd_type_id;

 

【参考资料】

[1] Jason Price.精通Oracle Database 12c SQL&PLSQL编程(第3版).[M].北京:清华大学出版社,2014

posted @ 2016-01-12 16:14  岑亮  阅读(384)  评论(0)    收藏  举报