Hadoop--map/reduce实现单词计数

 1 import org.apache.hadoop.fs.Path;
 2 import org.apache.hadoop.io.*;
 3 import org.apache.hadoop.mapred.*;
 4 
 5 import java.io.IOException;
 6 import java.util.*;
 7 
 8 public class WordCount {
 9 
10     /*
11      * 实现输入内容单词的计数功能
12      * 一、mapper方法将输入内容处理为<key1,value1>形式
13      * 二、reduce方法接收mapper的结果，将相同key1的value值相加得到单词的个数
14      * 三、输出得到的结果到hdfs中
15      * 
16      * */
17     
18     //main函数
19     public static void main(String[] args) throws Exception{
20         JobConf conf=new JobConf(WordCount.class);
21         conf.setJobName("WordCount");
22         conf.setOutputKeyClass(Text.class);
23         conf.setOutputValueClass(IntWritable.class);
24         
25         conf.setMapperClass(Map.class);
26         conf.setReducerClass(Reduce.class);
27         
28         conf.setInputFormat(TextInputFormat.class);
29         conf.setOutputFormat(TextOutputFormat.class);
30         
31         FileInputFormat.setInputPaths(conf,new Path(args[0]));
32         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
33         
34         JobClient.runJob(conf);
35         
36     }
37     
38     //map函数
39     public static class Map extends MapReduceBase implements Mapper<LongWritable,Text,
40     Text,IntWritable>{
41         private final static IntWritable one=new IntWritable(1);
42         private Text word=new Text();
43         
44         public void map(LongWritable key,Text value, 
45         OutputCollector<Text,IntWritable>output,Reporter reporter)throws IOException{
46             String line=value.toString();
47             StringTokenizer tokenizer=new StringTokenizer(line);
48             while(tokenizer.hasMoreTokens()){
49                 word.set(tokenizer.nextToken());
50                 output.collect(word, one);
51                 
52             }
53             
54         }
55         
56     }
57     
58     //reduce函数
59     public static class Reduce extends MapReduceBase implements Reducer<Text,IntWritable,
60     Text,IntWritable>{
61         public void reduce(Text key,Iterator<IntWritable>values,OutputCollector<Text,
62         IntWritable>output,Reporter repoter) throws IOException{
63             int sum=0;
64             while(values.hasNext()){
65                 sum+=values.next().get();
66             }
67             output.collect(key,new IntWritable(sum));
68         }
69     }
70     
71     
72     
73 }
posted @ 2014-07-07 10:31 林六天阅读(1072) 评论(0) 收藏举报
刷新页面返回顶部
林六天

生命不息，学习不止

Hadoop--map/reduce实现单词计数

公告