MapRecude

任务:分析通话记录,查处每个手机号码有哪些打过来的号码

13510921776 10086
13710148751 10086
13914248991 10086
13510921776 13710148751
13510921776 13710148751
13914248991 13710148751
13710148751 13510921776

要求输出结果:

10086    13510921776|13710148751|13914248991|
13510921776    13710148751|
13710148751    13510921776|13510921776|13914248991|

代码:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;


public class PhoneAnalyzer extends Configured implements Tool {

    enum Counter {
        LINESKIP; // 出错的行
    }

    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        Job job = new Job(conf, "phoneAnalyzer"); // 任务名
        job.setJarByClass(PhoneAnalyzer.class); // 指定Class
        FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/user/root/in")); // 输入路径
        FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/user/root/out")); // 输出路径
        job.setMapperClass(Map.class); // 调用Map类作为Mapper任务代码
        job.setReducerClass(Reduce.class); // 调用Reduce类作为Reducer任务代码
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class); // 指定输出的Key的格式(KEYOUT)
        job.setOutputValueClass(Text.class); // 指定输出的Value的格式(VALUEOUT)
        job.waitForCompletion(true);
        return job.isSuccessful() ? 0 : 1;
    }

    public static class Map extends
            Mapper<LongWritable, Text, Text, Text> {    //<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            try {
                // key - 行号 value - 一行的文本
                String line = value.toString();    //13510000000 10086(13510000000拨打10086)
                // 数据处理
                String[] lineSplit = line.split(" ");
                String phone1 = lineSplit[0];
                String phone2 = lineSplit[1];
                context.write(new Text(phone2), new Text(phone1));    // 输出 key \t value
            } catch (Exception e) {
                context.getCounter(Counter.LINESKIP).increment(1); // 出错令计数器+1
            }
        }

    }
    
    public static class Reduce extends Reducer<Text, Text, Text, Text> {    //<KEYIN(必须与Mapper的KEYOUT相同),VALUEIN(必须与Mapper的VALUEOUT相同),KEYOUT,VALUEOUT>
        
        @Override
        protected void reduce(Text key, Iterable<Text> values,
                Context context)
                throws IOException, InterruptedException {
            String valueStr;
            String out = "";
            for(Text value:values){
                valueStr = value.toString() + "|";
                out += valueStr;
            }
            // 输出 key \t value(如果我们的输出结果不是key \t value格式,那么我们的key可定义为NullWritable,而value使用key与value的组合。)
            context.write(key, new Text(out));    
        }
    }
    
    public static void main(String[] args) throws Exception {
        //运行任务
        int res = ToolRunner.run(new Configuration(), new PhoneAnalyzer(), args);
        System.exit(res);
    }
}

 

posted @ 2013-10-18 19:35  Kyle_Java  阅读(776)  评论(0编辑  收藏  举报