MapReduce 中的Map后，sort不能对中文的key排序

今天写了一个用mapreduce求平均分的程序，结果是出来了，可是没有按照“学生名字”进行排序，如果是英文名字的话，结果是排好序的。

代码如下：

package com.pro.bq;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.fs.Path;

public class AverageScore {
    public static class MapAvg extends Mapper<Object, Text, Text, IntWritable>
    {

        public void map(Object key, Text value,Context context)
                throws IOException, InterruptedException {  
//            String[] lineData=value.toString().split(" ");//split中间如果有很多“ ”的话lineData的长度增加，灵活性差
//            if(lineData.length==2)
//            {        
//                name.set(lineData[0]);
//                score.set(Integer.parseInt(lineData[1]));
//                context.write(name,score);
//            }      
            String line=value.toString();
            StringTokenizer tokenizer=new StringTokenizer(line,"\n");
            while(tokenizer.hasMoreElements())
            {
                StringTokenizer token=new StringTokenizer(tokenizer.nextToken());
                Text name=new Text(token.nextToken());
                IntWritable score=new IntWritable(Integer.parseInt(token.nextToken()));
                context.write(name,score);
            }
        }
    }
    public static class ReduceAvg extends Reducer<Text, IntWritable, Text, IntWritable>
    {
        
        public void reduce(Text key, Iterable<IntWritable> values,Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            int sum=0;
            int cnt=0;
            for(IntWritable val:values)
            {
                sum+=val.get();
                cnt++;
            }
            sum=(Integer)sum/cnt;
            context.write(key, new IntWritable(sum));
        }
    }
    
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        String[] hdfsPath=new String[]{"hdfs://localhost:9000/user/haduser/input/averageTest/","hdfs://localhost:9000/user/haduser/output/outAvgScore/"};
        String[] otherArgs=new GenericOptionsParser(conf, hdfsPath).getRemainingArgs();
        
        if(otherArgs.length!=2)
        {
            System.err.println("<in> <out>!!");
            System.exit(2);
        }
        Job job=new Job();
        job.setJarByClass(AverageScore.class);
        
        job.setMapperClass(MapAvg.class);
        job.setReducerClass(ReduceAvg.class);
        
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 
        FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true)?0:1);
        
    }

}

file1:
zhangsan 33
lisi 44
wangwu 55
zhaoliu 66

file2:
张三    4
李四    1
王五    2
赵六    3

file3:
zhangsan 22
lisi 33
wangwu 44
zhaoliu 55

file4:
李四 2
张三 1
王五 3
赵六 4

结果如下：

lisi    38
wangwu    49
zhangsan    27
zhaoliu    60
张三    2
李四    1
王五    2
赵六    3

难道不支持中文的排序？？以后学会自己写Partitioner后是不是可以自己写排序的程序？？以后解决...

posted @ 2014-02-20 22:25 很厉害的名字阅读(1107) 评论(0) 收藏举报

刷新页面返回顶部

搬砖的小沙弥

MapReduce 中的Map后，sort不能对中文的key排序