MapReduce的代码编写----统计学生的总分示例
score.txt
1500100001,1000001,98
1500100001,1000002,5
1500100001,1000003,137
1500100001,1000004,29
1500100001,1000005,85
1500100001,1000006,52
1500100002,1000001,139
1500100002,1000002,102
1500100002,1000003,44
1500100002,1000004,18
1500100002,1000005,46
1500100002,1000006,91
1500100003,1000001,48
...共6000行
程序代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class Demo3SumScore {
//Map端
public static class MyMapper extends Mapper<LongWritable, Text,LongWritable, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//按照逗号切分数据(.var)
String[] splits = value.toString().split(",");
//将数据中的Id提取出来(.var)
String id = splits[0];
String score = splits[2];
//以id作为key,分数score作为value,进行发送
//id和score都需要转型,不能直接作为参数
context.write(new LongWritable(Long.parseLong(id)),new IntWritable(Integer.parseInt(score)));
}
}
//Reduce端
public static class MyReducer extends Reducer<LongWritable,IntWritable,LongWritable,IntWritable>{
@Override
protected void reduce(LongWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sumScore = 0;
//遍历迭代器,统计每个学生的总分
for (IntWritable value : values) {
sumScore = sumScore + value.get();
}
//sumScore不是对应的IntWritable类型,需要new一下
context.write(key,new IntWritable(sumScore));
}
}
//Driver端
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
//创建配置对象
Configuration conf = new Configuration();
//MapReduce在运行的时候我们把它称为Job,创建一个Job实例
Job job = Job.getInstance();
//对Job进行一些简单的配置,参数名字为类名
job.setJobName("Demo3SumScore");
//通过class类设置运行Job时该执行哪一个类
job.setJarByClass(Demo3SumScore.class);
//配置Map任务
//配置Map任务该运行哪一个类(前面的Map端的类)
job.setMapperClass(MyMapper.class);
//对Map端输出的Key、value的类型进行配置
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(IntWritable.class);
//配置Reduce任务
//配置Reduce任务运行哪一个类(前面的Reduce端的类)
job.setReducerClass(MyReducer.class);
//对Reduce端输出的Key、value的类型进行配置
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(IntWritable.class);
// 配置输入输出路径
FileInputFormat.addInputPath(job,new Path("/student/score/input"));
//输出路径不需要提前创建,如果该目录已存在则会报错,加个if语句判断
FileSystem fs = FileSystem.get(conf);
if(fs.exists(new Path("/student/score/output"))){
fs.delete(new Path("/student/score/output"),true);
}
FileOutputFormat.setOutputPath(job,new Path("/student/score/output"));
// 等待job运行完成
job.waitForCompletion(true);
}
}
执行结果
1500100001 406
1500100002 440
1500100003 359
1500100004 421
1500100005 395
1500100006 314
1500100007 418
1500100008 363
1500100009 251
1500100010 402
...