hadoop中的helloword

设置classpath
设置classpath



实现WordCount
import java.io.*;
import org.apache.hadoop.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.jobcontrol.*;
import org.apache.hadoop.mapreduce.lib.map.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
public class WordCount1
{
    public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>
    {
         private final static IntWritable one = new IntWritable(1);
         private Text word = new Text();
         public void map(Object key,Text value,Context context) throws IOException,InterruptedException
         {
             String[] words = value.toString().split(" ");
             for(String str: words)
             {
                    word.set(str);
                    context.write(word,one);
             }
         }
    }
    public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>
    {
         public void reduce(Text key,Iterable<IntWritable>values,Context context) throws IOException,InterruptedException
         {
              int total =0;
              for(IntWritable val:values)
              {
                  total++;
              }
              context.write(key,new IntWritable(total));
         }
    }
    public static void main(String[] args) throws Exception {
          Configuration conf = new Configuration();
          //Job job = new Job(conf,"word count");
          Job job = Job.getInstance();
          job.setJobName("word count");
          job.setJarByClass(WordCount1.class);
          job.setMapperClass(WordCountMapper.class);
          job.setReducerClass(WordCountReducer.class);
          job.setOutputKeyClass(Text.class);
          job.setOutputValueClass(IntWritable.class);
          FileInputFormat.addInputPath(job,new Path(args[0]));
          FileOutputFormat.setOutputPath(job,new Path(args[1]));
          System.exit(job.waitForCompletion(true)?0:1);
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.map.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;
public class WordCountPredefined
{
      public static void main(String[] args) throws Exception
      {
        Configuration conf = new Configuration();
        //Job job = new Job(conf,"word count1");
        Job job = Job.getInstance();
        job.setJobName("word count1");
        job.setJarByClass(WordCountPredefined.class);
        job.setMapperClass(TokenCounterMapper.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        System.exit(job.waitForCompletion(true)?0:1);
      }
}

WordCount的简易方法

 


 
实现WordCount
  1. import java.io.*;
  2. import org.apache.hadoop.*;
  3. import org.apache.hadoop.conf.Configuration;
  4. import org.apache.hadoop.fs.Path;
  5. import org.apache.hadoop.io.IntWritable;
  6. import org.apache.hadoop.io.Text;
  7. import org.apache.hadoop.mapreduce.*;
  8. import org.apache.hadoop.mapreduce.lib.jobcontrol.*;
  9. import org.apache.hadoop.mapreduce.lib.map.*;
  10. import org.apache.hadoop.mapreduce.lib.reduce.*;
  11. import org.apache.hadoop.mapreduce.lib.input.*;
  12. import org.apache.hadoop.mapreduce.lib.output.*;
  13. public class WordCount1
  14. {
  15. public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>
  16. {
  17. private final static IntWritable one = new IntWritable(1);
  18. private Text word = new Text();
  19. public void map(Object key,Text value,Context context) throws IOException,InterruptedException
  20. {
  21. String[] words = value.toString().split(" ");
  22. for(String str: words)
  23. {
  24. word.set(str);
  25. context.write(word,one);
  26. }
  27. }
  28. }
  29. public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>
  30. {
  31. public void reduce(Text key,Iterable<IntWritable>values,Context context) throws IOException,InterruptedException
  32. {
  33. int total =0;
  34. for(IntWritable val:values)
  35. {
  36. total++;
  37. }
  38. context.write(key,new IntWritable(total));
  39. }
  40. }
  41. public static void main(String[] args) throws Exception {
  42. Configuration conf = new Configuration();
  43. //Job job = new Job(conf,"word count");
  44. Job job = Job.getInstance();
  45. job.setJobName("word count");
  46. job.setJarByClass(WordCount1.class);
  47. job.setMapperClass(WordCountMapper.class);
  48. job.setReducerClass(WordCountReducer.class);
  49. job.setOutputKeyClass(Text.class);
  50. job.setOutputValueClass(IntWritable.class);
  51. FileInputFormat.addInputPath(job,new Path(args[0]));
  52. FileOutputFormat.setOutputPath(job,new Path(args[1]));
  53. System.exit(job.waitForCompletion(true)?0:1);
  54. }
  55. }
  1. import org.apache.hadoop.conf.Configuration;
  2. import org.apache.hadoop.fs.Path;
  3. import org.apache.hadoop.io.IntWritable;
  4. import org.apache.hadoop.io.Text;
  5. import org.apache.hadoop.mapreduce.Job;
  6. import org.apache.hadoop.mapreduce.lib.input.*;
  7. import org.apache.hadoop.mapreduce.lib.output.*;
  8. import org.apache.hadoop.mapreduce.lib.map.*;
  9. import org.apache.hadoop.mapreduce.lib.reduce.*;
  10. public class WordCountPredefined
  11. {
  12. public static void main(String[] args) throws Exception
  13. {
  14. Configuration conf = new Configuration();
  15. //Job job = new Job(conf,"word count1");
  16. Job job = Job.getInstance();
  17. job.setJobName("word count1");
  18. job.setJarByClass(WordCountPredefined.class);
  19. job.setMapperClass(TokenCounterMapper.class);
  20. job.setReducerClass(IntSumReducer.class);
  21. job.setOutputKeyClass(Text.class);
  22. job.setOutputValueClass(IntWritable.class);
  23. FileInputFormat.addInputPath(job,new Path(args[0]));
  24. FileOutputFormat.setOutputPath(job,new Path(args[1]));
  25. System.exit(job.waitForCompletion(true)?0:1);
  26. }
  27. }
WordCount的简易方法
posted @ 2017-11-15 17:11  ghevinn  阅读(189)  评论(0)    收藏  举报