1 package com.euphe.filter;
2
3 import com.euphe.util.HUtils;
4 import com.euphe.util.Utils;
5 import org.apache.hadoop.conf.Configuration;
6 import org.apache.hadoop.conf.Configured;
7 import org.apache.hadoop.fs.FileSystem;
8 import org.apache.hadoop.fs.Path;
9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.util.GenericOptionsParser;
16 import org.apache.hadoop.util.Tool;
17
18 import java.io.IOException;
19
20 public class ReductionJob extends Configured implements Tool {
21 public static class Map extends Mapper<Object, Text, Text, Text> {
22 private static Text text = new Text();
23
24 public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
25 text = value;
26 context.write(text, new Text());
27 }
28 }
29
30 public static class Reduce extends Reducer<Text, Text, Text, Text> {
31 public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
32 context.write(key, new Text());
33 }
34 }
35 @Override
36 public int run(String[] args) throws Exception {
37 Configuration conf = HUtils.getConf();
38 conf.set("mapreduce.job.jar", Utils.getRootPathBasedPath("WEB-INF/jars/redu.jar"));
39 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();//解析命令行参数
40 if (otherArgs.length !=2) {//要求必须有输入和输出路径两个参数
41 System.err.println("Usage: com.euphe.filter.ReductionJob <in> <out>");
42 System.exit(2);
43 }
44 Job job = Job.getInstance(conf,"Reduction input :"+otherArgs[0]+" to "+otherArgs[1]);
45 job.setJarByClass(ReductionJob.class);
46 job.setMapperClass(Map.class);
47 job.setReducerClass(Reduce.class);
48 job.setNumReduceTasks(1);
49
50 job.setOutputKeyClass(Text.class);
51 job.setOutputValueClass(Text.class);
52
53 FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
54 FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
55 FileSystem.get(conf).delete(new Path(otherArgs[1]), true);//调用任务前先删除输出目录
56 return job.waitForCompletion(true) ? 0 : 1;
57 }
58 }