# 简单，可复制

 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 :: 管理 ::

## 解决过程

create table score_table (
subject        string,
student       string,
score           int
)
partitioned by (date string)

create temporary function row_number as 'com.blue.hive.udf.RowNumber';
select subject,score,student from
(select subject,score,student from score where dt='2012'  order by subject,socre desc) order_score
where row_number(subject) <= 100;

com.blue.hive.udf.RowNumber是自定义函数，函数的作用是按指定的列进行分组生成行序列。这里根据每个科目的所有成绩，生成序列，序列值从1开始自增。

物理  80 张三

物理  100 张一

科目  成绩 学生   row_number

.....

....

create temporary function row_number as 'com.blue.hive.udf.RowNumber';
explain select subject,score,student from
(select subject,score,student from score where dt='2012'  order by subject,socre desc) order_score
where row_number(subject) <= 100;

explain不会执行mapreduce计算，只会显示执行计划。

sql如下：

create temporary function row_number as 'com.blue.hive.udf.RowNumber';
select subject,score,student from
(select subject,score,student from score where dt='2012'  distribute by subject sort by subject asc, socre desc) order_score
where row_number(subject) <= 100;

create temporary function row_number as 'com.blue.hive.udf.RowNumber';
explain select college,subject,score,student from
(select college,subject,score,student from score where dt='2012'  order by college asc,subject asc,socre desc) order_score
where row_number(college,subject) <= 100;

create temporary function row_number as 'com.blue.hive.udf.RowNumber';
explain select college,totalscore,student from
(select college,student,sum(score) as totalscore from score where dt='2012'  group by college,student  order by college asc,totalscore desc) order_score
where row_number(college) <= 100;

## row_number的源码

package com.blue.hive.udf;

public class RowNumber extends UDF {

private static int MAX_VALUE = 50;
private static String comparedColumn[] = new String[MAX_VALUE];
private static int rowNum = 1;

public int evaluate(Object... args) {
String columnValue[] = new String[args.length];
for (int i = 0; i < args.length; i++) 『
columnValue[i] = args[i].toString();
}
if (rowNum == 1) {
for (int i = 0; i < columnValue.length; i++)
comparedColumn[i] = columnValue[i];
}

for (int i = 0; i < columnValue.length; i++) {
if (!comparedColumn[i].equals(columnValue[i])) {
for (int j = 0; j < columnValue.length; j++) {
comparedColumn[j] = columnValue[j];
}
rowNum = 1;
return rowNum++;
}
}
return rowNum++;
}
}

add jar /usr/local/hive/udf/blueudf.jar;
create temporary function row_number as 'com.blue.hive.udf.RowNumber';
select subject,score,student from
(select subject,score,student from score where dt='2012'  order by subject,socre desc) order_score
where row_number(subject) <= 100;

hive 0.12之前可用，0.12之后不可用，只能用窗口函数替代。

posted on 2013-01-30 17:10  ggjucheng  阅读(20246)  评论(3编辑  收藏  举报