1 package com.mengyao.hadoop.mapreduce;
2
3 import java.io.IOException;
4 import java.text.SimpleDateFormat;
5 import java.util.Date;
6
7 import org.apache.hadoop.conf.Configuration;
8 import org.apache.hadoop.conf.Configured;
9 import org.apache.hadoop.fs.Path;
10 import org.apache.hadoop.io.LongWritable;
11 import org.apache.hadoop.io.Text;
12 import org.apache.hadoop.mapreduce.Job;
13 import org.apache.hadoop.mapreduce.Mapper;
14 import org.apache.hadoop.mapreduce.Reducer;
15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
16 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
17 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
18 import org.apache.hadoop.util.Tool;
19 import org.apache.hadoop.util.ToolRunner;
20
21
22 public class MyGroupApp extends Configured implements Tool {
23
24 static class MyGroupMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
25
26 private Text k = null;
27 private LongWritable v = null;
28
29 @Override
30 protected void setup(
31 Mapper<LongWritable, Text, Text, LongWritable>.Context context)
32 throws IOException, InterruptedException {
33 k = new Text();
34 v = new LongWritable(1L);
35 }
36
37 @Override
38 protected void map(LongWritable key, Text value, Context context)
39 throws IOException, InterruptedException {
40 final String[] words = value.toString().split("\t");
41 for (String word : words) {
42 k.set(word);
43 context.write(k, v);
44 }
45 }
46 }
47
48 static class MyGroupReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
49 @Override
50 protected void reduce(Text key, Iterable<LongWritable> value, Context context)
51 throws IOException, InterruptedException {
52 long count = 0L;
53 for (LongWritable item : value) {
54 count += item.get();
55 }
56 context.write(key, new LongWritable(count));
57 }
58 }
59
60 @Override
61 public int run(String[] arg0) throws Exception {
62 Configuration conf = getConf();
63 conf.set("mapreduce.job.jvm.numtasks", "-1");
64 conf.set("mapreduce.map.speculative", "false");
65 conf.set("mapreduce.reduce.speculative", "false");
66 conf.set("mapreduce.map.maxattempts", "4");
67 conf.set("mapreduce.reduce.maxattempts", "4");
68 conf.set("mapreduce.map.skip.maxrecords", "0");
69 Job job = Job.getInstance(conf, MyGroupApp.class.getSimpleName());
70 job.setJarByClass(MyGroupApp.class);
71 job.setInputFormatClass(TextInputFormat.class);
72
73 FileInputFormat.addInputPath(job, new Path(arg0[0]));
74 FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
75
76 job.setMapperClass(MyGroupMapper.class);
77 job.setMapOutputKeyClass(Text.class);
78 job.setMapOutputValueClass(LongWritable.class);
79
80 job.setReducerClass(MyGroupReducer.class);
81 job.setOutputKeyClass(Text.class);
82 job.setOutputValueClass(LongWritable.class);
83
84 return job.waitForCompletion(true)?0:1;
85 }
86
87
88 public static int createJob(String[] args) {
89 Configuration conf = new Configuration();
90 int status = 1;
91 try {
92 status = ToolRunner.run(conf, new MyGroupApp(), args);
93 } catch (Exception e) {
94 e.printStackTrace();
95 throw new RuntimeException(e);
96 }
97
98 return status;
99 }
100
101 public static void main(String[] args) throws Exception {
102 //此处用ant直接编译打包上传运行,先行赋值
103 args = new String[]{"/testdata/words", "/job/mapreduce/"+WordCountApp.class.getSimpleName()+"_"+new SimpleDateFormat("yyyyMMddhhMMss").format(new Date())};
104 if (args.length != 2) {
105 System.out.println("Usage: "+WordCountApp.class.getSimpleName()+" <in> <out>");
106 System.exit(2);
107 } else {
108 int status = createJob(args);
109 System.exit(status);
110 }
111
112 }
113
114 }