MapReduce 中的Map后,sort不能对中文的key排序

今天写了一个用mapreduce求平均分的程序,结果是出来了,可是没有按照“学生名字”进行排序,如果是英文名字的话,结果是排好序的。

代码如下:

package com.pro.bq;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.fs.Path;

public class AverageScore {
    public static class MapAvg extends Mapper<Object, Text, Text, IntWritable>
    {

        public void map(Object key, Text value,Context context)
                throws IOException, InterruptedException {  
//            String[] lineData=value.toString().split(" ");//split中间如果有很多“ ”的话lineData的长度增加,灵活性差
//            if(lineData.length==2)
//            {        
//                name.set(lineData[0]);
//                score.set(Integer.parseInt(lineData[1]));
//                context.write(name,score);
//            } String line
=value.toString(); StringTokenizer tokenizer=new StringTokenizer(line,"\n"); while(tokenizer.hasMoreElements()) { StringTokenizer token=new StringTokenizer(tokenizer.nextToken()); Text name=new Text(token.nextToken()); IntWritable score=new IntWritable(Integer.parseInt(token.nextToken())); context.write(name,score); } } } public static class ReduceAvg extends Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub int sum=0; int cnt=0; for(IntWritable val:values) { sum+=val.get(); cnt++; } sum=(Integer)sum/cnt; context.write(key, new IntWritable(sum)); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); String[] hdfsPath=new String[]{"hdfs://localhost:9000/user/haduser/input/averageTest/","hdfs://localhost:9000/user/haduser/output/outAvgScore/"}; String[] otherArgs=new GenericOptionsParser(conf, hdfsPath).getRemainingArgs(); if(otherArgs.length!=2) { System.err.println("<in> <out>!!"); System.exit(2); } Job job=new Job(); job.setJarByClass(AverageScore.class); job.setMapperClass(MapAvg.class); job.setReducerClass(ReduceAvg.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)?0:1); } }
file1:
zhangsan 33
lisi 44
wangwu 55
zhaoliu 66

file2:
张三    4
李四    1
王五    2
赵六    3

file3:
zhangsan 22
lisi 33
wangwu 44
zhaoliu 55

file4:
李四 2
张三 1
王五 3
赵六 4

结果如下:

lisi    38
wangwu    49
zhangsan    27
zhaoliu    60
张三    2
李四    1
王五    2
赵六    3

难道不支持中文的排序??以后学会自己写Partitioner后是不是可以自己写排序的程序??以后解决...

posted @ 2014-02-20 22:25  很厉害的名字  阅读(1100)  评论(0编辑  收藏  举报