1 package com.mengyao.hadoop.mapreduce;
2
3 import java.io.IOException;
4
5 import org.apache.hadoop.conf.Configuration;
6 import org.apache.hadoop.conf.Configured;
7 import org.apache.hadoop.fs.Path;
8 import org.apache.hadoop.io.LongWritable;
9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
16 import org.apache.hadoop.util.Tool;
17 import org.apache.hadoop.util.ToolRunner;
18
19 /**
20 * 输入文件目录为HDFS上的/mapreduces/word.txt,内容如下:
21 * hadoop zookeeper hbase hive
22 * flume sqoop pig mahout
23 * hadoop spark mllib hive zookeeper
24 * hadoop storm kafka redis zookeeper
25 *
26 * 输出目录为HDFS上的/mapreduces/wordcount/
27 * _SUCCESS空文件表示作业执行成功(如果是_FAILD文件则失败)
28 * part-r-00000文件表示作业的结果,内容如下:
29 * flume 1
30 * hadoop 3
31 * hbase 1
32 * hive 2
33 * kafka 1
34 * mahout 1
35 * mllib 1
36 * pig 1
37 * redis 1
38 * spark 1
39 * sqoop 1
40 * storm 1
41 * zookeeper 3
42 *
43 * @author mengyao
44 *
45 */
46 public class WordCount extends Configured implements Tool {
47
48 static class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
49
50 private Text outputKey;
51 private LongWritable outputValue;
52
53 @Override
54 protected void setup(Context context)
55 throws IOException, InterruptedException {
56 this.outputKey = new Text();
57 this.outputValue = new LongWritable(1L);
58 }
59
60 @Override
61 protected void map(LongWritable key, Text value, Context context)
62 throws IOException, InterruptedException {
63 final String[] words = value.toString().split("\t");
64 for (String word : words) {
65 this.outputKey.set(word);
66 context.write(this.outputKey, this.outputValue);
67 }
68 }
69 }
70
71 static class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
72
73 private Text outputKey;
74 private LongWritable outputValue;
75
76 @Override
77 protected void setup(Context context)
78 throws IOException, InterruptedException {
79 this.outputKey = new Text();
80 this.outputValue = new LongWritable();
81 }
82
83 @Override
84 protected void reduce(Text key, Iterable<LongWritable> value, Context context)
85 throws IOException, InterruptedException {
86 long count = 0L;
87 for (LongWritable item : value) {
88 count += item.get();
89 }
90 this.outputKey.set(key);
91 this.outputValue.set(count);
92 context.write(this.outputKey, this.outputValue);
93 }
94 }
95
96 @Override
97 public int run(String[] args) throws Exception {
98 Job job = Job.getInstance(getConf(), WordCount.class.getSimpleName());
99 job.setJarByClass(WordCount.class);
100
101 job.setInputFormatClass(TextInputFormat.class);
102 FileInputFormat.addInputPath(job, new Path(args[0]));
103 FileOutputFormat.setOutputPath(job, new Path(args[1]));
104
105 job.setMapperClass(WordCountMapper.class);
106 job.setMapOutputKeyClass(Text.class);
107 job.setMapOutputValueClass(LongWritable.class);
108
109 job.setCombinerClass(WordCountReducer.class);
110
111 job.setReducerClass(WordCountReducer.class);
112 job.setOutputKeyClass(Text.class);
113 job.setOutputValueClass(LongWritable.class);
114
115 return job.waitForCompletion(true)?0:1;
116 }
117
118 public static int createJob(String[] args) {
119 Configuration conf = new Configuration();
120 conf.set("dfs.datanode.socket.write.timeout", "7200000");
121 conf.set("mapreduce.input.fileinputformat.split.minsize", "268435456");
122 conf.set("mapreduce.input.fileinputformat.split.maxsize", "536870912");
123 int status = 0;
124
125 try {
126 status = ToolRunner.run(conf, new WordCount(), args);
127 } catch (Exception e) {
128 e.printStackTrace();
129 }
130
131 return status;
132 }
133
134 public static void main(String[] args) {
135 args = new String[]{"/mapreduces/word.txt", "/mapreduces/wordcount"};
136 if (args.length!=2) {
137 System.out.println("Usage: "+WordCount.class.getName()+" Input paramters <INPUT_PATH> <OUTPUT_PATH>");
138 } else {
139 int status = createJob(args);
140 System.exit(status);
141 }
142 }
143
144 }