1 import org.apache.hadoop.fs.Path;
2 import org.apache.hadoop.io.*;
3 import org.apache.hadoop.mapred.*;
4
5 import java.io.IOException;
6 import java.util.*;
7
8 public class WordCount {
9
10 /*
11 * 实现输入内容单词的计数功能
12 * 一、mapper方法将输入内容处理为<key1,value1>形式
13 * 二、reduce方法接收mapper的结果,将相同key1的value值相加得到单词的个数
14 * 三、输出得到的结果到hdfs中
15 *
16 * */
17
18 //main函数
19 public static void main(String[] args) throws Exception{
20 JobConf conf=new JobConf(WordCount.class);
21 conf.setJobName("WordCount");
22 conf.setOutputKeyClass(Text.class);
23 conf.setOutputValueClass(IntWritable.class);
24
25 conf.setMapperClass(Map.class);
26 conf.setReducerClass(Reduce.class);
27
28 conf.setInputFormat(TextInputFormat.class);
29 conf.setOutputFormat(TextOutputFormat.class);
30
31 FileInputFormat.setInputPaths(conf,new Path(args[0]));
32 FileOutputFormat.setOutputPath(conf, new Path(args[1]));
33
34 JobClient.runJob(conf);
35
36 }
37
38 //map函数
39 public static class Map extends MapReduceBase implements Mapper<LongWritable,Text,
40 Text,IntWritable>{
41 private final static IntWritable one=new IntWritable(1);
42 private Text word=new Text();
43
44 public void map(LongWritable key,Text value,
45 OutputCollector<Text,IntWritable>output,Reporter reporter)throws IOException{
46 String line=value.toString();
47 StringTokenizer tokenizer=new StringTokenizer(line);
48 while(tokenizer.hasMoreTokens()){
49 word.set(tokenizer.nextToken());
50 output.collect(word, one);
51
52 }
53
54 }
55
56 }
57
58 //reduce函数
59 public static class Reduce extends MapReduceBase implements Reducer<Text,IntWritable,
60 Text,IntWritable>{
61 public void reduce(Text key,Iterator<IntWritable>values,OutputCollector<Text,
62 IntWritable>output,Reporter repoter) throws IOException{
63 int sum=0;
64 while(values.hasNext()){
65 sum+=values.next().get();
66 }
67 output.collect(key,new IntWritable(sum));
68 }
69 }
70
71
72
73 }