MapReduce的代码编写----统计学生的总分示例

MapReduce的代码编写----统计学生的总分示例

score.txt
1500100001,1000001,98
1500100001,1000002,5
1500100001,1000003,137
1500100001,1000004,29
1500100001,1000005,85
1500100001,1000006,52
1500100002,1000001,139
1500100002,1000002,102
1500100002,1000003,44
1500100002,1000004,18
1500100002,1000005,46
1500100002,1000006,91
1500100003,1000001,48
...共6000行
程序代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Demo3SumScore {
    //Map端
    public static class MyMapper extends Mapper<LongWritable, Text,LongWritable, IntWritable>{
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //按照逗号切分数据(.var)
            String[] splits = value.toString().split(",");
            //将数据中的Id提取出来(.var)
            String id = splits[0];
            String score = splits[2];
            //以id作为key,分数score作为value,进行发送
            //id和score都需要转型,不能直接作为参数
            context.write(new LongWritable(Long.parseLong(id)),new IntWritable(Integer.parseInt(score)));
        }
    }

    //Reduce端
    public static class MyReducer extends Reducer<LongWritable,IntWritable,LongWritable,IntWritable>{
        @Override
        protected void reduce(LongWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sumScore = 0;
            //遍历迭代器,统计每个学生的总分
            for (IntWritable value : values) {
                sumScore = sumScore + value.get();
            }
            //sumScore不是对应的IntWritable类型,需要new一下
            context.write(key,new IntWritable(sumScore));
        }
    }

    //Driver端
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        //创建配置对象
        Configuration conf = new Configuration();

        //MapReduce在运行的时候我们把它称为Job,创建一个Job实例
        Job job = Job.getInstance();
        //对Job进行一些简单的配置,参数名字为类名
        job.setJobName("Demo3SumScore");
        //通过class类设置运行Job时该执行哪一个类
        job.setJarByClass(Demo3SumScore.class);

        //配置Map任务
        //配置Map任务该运行哪一个类(前面的Map端的类)
        job.setMapperClass(MyMapper.class);
        //对Map端输出的Key、value的类型进行配置
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        //配置Reduce任务
        //配置Reduce任务运行哪一个类(前面的Reduce端的类)
        job.setReducerClass(MyReducer.class);
        //对Reduce端输出的Key、value的类型进行配置
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(IntWritable.class);

        // 配置输入输出路径
        FileInputFormat.addInputPath(job,new Path("/student/score/input"));
        //输出路径不需要提前创建,如果该目录已存在则会报错,加个if语句判断
        FileSystem fs = FileSystem.get(conf);
        if(fs.exists(new Path("/student/score/output"))){
            fs.delete(new Path("/student/score/output"),true);
        }
        FileOutputFormat.setOutputPath(job,new Path("/student/score/output"));

        // 等待job运行完成
        job.waitForCompletion(true);
    }

}
执行结果
1500100001	406
1500100002	440
1500100003	359
1500100004	421
1500100005	395
1500100006	314
1500100007	418
1500100008	363
1500100009	251
1500100010	402
...
posted @ 2022-02-16 15:07  阿伟宝座  阅读(365)  评论(0)    收藏  举报