hadoop中的helloword

设置classpath

设置classpath



实现WordCount
import java.io.*;
import org.apache.hadoop.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.jobcontrol.*;
import org.apache.hadoop.mapreduce.lib.map.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
public class WordCount1
{
    public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>
    {
         private final static IntWritable one = new IntWritable(1);
         private Text word = new Text();
         public void map(Object key,Text value,Context context) throws IOException,InterruptedException
         {
             String[] words = value.toString().split(" ");
             for(String str: words)
             {
                    word.set(str);
                    context.write(word,one);
             }
         }
    }
    public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>
    {
         public void reduce(Text key,Iterable<IntWritable>values,Context context) throws IOException,InterruptedException
         {
              int total =0;
              for(IntWritable val:values)
              {
                  total++;
              }
              context.write(key,new IntWritable(total));
         }
    }
    public static void main(String[] args) throws Exception {
          Configuration conf = new Configuration();
          //Job job = new Job(conf,"word count");
          Job job = Job.getInstance();
          job.setJobName("word count");
          job.setJarByClass(WordCount1.class);
          job.setMapperClass(WordCountMapper.class);
          job.setReducerClass(WordCountReducer.class);
          job.setOutputKeyClass(Text.class);
          job.setOutputValueClass(IntWritable.class);
          FileInputFormat.addInputPath(job,new Path(args[0]));
          FileOutputFormat.setOutputPath(job,new Path(args[1]));
          System.exit(job.waitForCompletion(true)?0:1);
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.map.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;
public class WordCountPredefined
{
      public static void main(String[] args) throws Exception
      {
        Configuration conf = new Configuration();
        //Job job = new Job(conf,"word count1");
        Job job = Job.getInstance();
        job.setJobName("word count1");
        job.setJarByClass(WordCountPredefined.class);
        job.setMapperClass(TokenCounterMapper.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        System.exit(job.waitForCompletion(true)?0:1);
      }
}

WordCount的简易方法

实现WordCount

import java.io.*;
import org.apache.hadoop.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.jobcontrol.*;
import org.apache.hadoop.mapreduce.lib.map.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
public class WordCount1
{
public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>
{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key,Text value,Context context) throws IOException,InterruptedException
{
String[] words = value.toString().split(" ");
for(String str: words)
{
word.set(str);
context.write(word,one);
}
}
}
public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>
{
public void reduce(Text key,Iterable<IntWritable>values,Context context) throws IOException,InterruptedException
{
int total =0;
for(IntWritable val:values)
{
total++;
}
context.write(key,new IntWritable(total));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//Job job = new Job(conf,"word count");
Job job = Job.getInstance();
job.setJobName("word count");
job.setJarByClass(WordCount1.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.map.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;
public class WordCountPredefined
{
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
//Job job = new Job(conf,"word count1");
Job job = Job.getInstance();
job.setJobName("word count1");
job.setJarByClass(WordCountPredefined.class);
job.setMapperClass(TokenCounterMapper.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}

WordCount的简易方法

posted @ 2017-11-15 17:11 ghevinn 阅读(189) 评论(0) 收藏举报

刷新页面返回顶部

guohaiwen

hadoop中的helloword

公告