hadoop中的helloword
设置classpath
设置classpath 实现WordCount import java.io.*; import org.apache.hadoop.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.jobcontrol.*; import org.apache.hadoop.mapreduce.lib.map.*; import org.apache.hadoop.mapreduce.lib.reduce.*; import org.apache.hadoop.mapreduce.lib.input.*; import org.apache.hadoop.mapreduce.lib.output.*; public class WordCount1 { public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key,Text value,Context context) throws IOException,InterruptedException { String[] words = value.toString().split(" "); for(String str: words) { word.set(str); context.write(word,one); } } } public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> { public void reduce(Text key,Iterable<IntWritable>values,Context context) throws IOException,InterruptedException { int total =0; for(IntWritable val:values) { total++; } context.write(key,new IntWritable(total)); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); //Job job = new Job(conf,"word count"); Job job = Job.getInstance(); job.setJobName("word count"); job.setJarByClass(WordCount1.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); System.exit(job.waitForCompletion(true)?0:1); } } import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.*; import org.apache.hadoop.mapreduce.lib.output.*; import org.apache.hadoop.mapreduce.lib.map.*; import org.apache.hadoop.mapreduce.lib.reduce.*; public class WordCountPredefined { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); //Job job = new Job(conf,"word count1"); Job job = Job.getInstance(); job.setJobName("word count1"); job.setJarByClass(WordCountPredefined.class); job.setMapperClass(TokenCounterMapper.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); System.exit(job.waitForCompletion(true)?0:1); } } WordCount的简易方法
实现WordCount
import java.io.*;import org.apache.hadoop.*;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.jobcontrol.*;import org.apache.hadoop.mapreduce.lib.map.*;import org.apache.hadoop.mapreduce.lib.reduce.*;import org.apache.hadoop.mapreduce.lib.input.*;import org.apache.hadoop.mapreduce.lib.output.*;public class WordCount1{public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>{private final static IntWritable one = new IntWritable(1);private Text word = new Text();public void map(Object key,Text value,Context context) throws IOException,InterruptedException{String[] words = value.toString().split(" ");for(String str: words){word.set(str);context.write(word,one);}}}public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>{public void reduce(Text key,Iterable<IntWritable>values,Context context) throws IOException,InterruptedException{int total =0;for(IntWritable val:values){total++;}context.write(key,new IntWritable(total));}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();//Job job = new Job(conf,"word count");Job job = Job.getInstance();job.setJobName("word count");job.setJarByClass(WordCount1.class);job.setMapperClass(WordCountMapper.class);job.setReducerClass(WordCountReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));System.exit(job.waitForCompletion(true)?0:1);}}
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.*;import org.apache.hadoop.mapreduce.lib.output.*;import org.apache.hadoop.mapreduce.lib.map.*;import org.apache.hadoop.mapreduce.lib.reduce.*;public class WordCountPredefined{public static void main(String[] args) throws Exception{Configuration conf = new Configuration();//Job job = new Job(conf,"word count1");Job job = Job.getInstance();job.setJobName("word count1");job.setJarByClass(WordCountPredefined.class);job.setMapperClass(TokenCounterMapper.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));System.exit(job.waitForCompletion(true)?0:1);}}
WordCount的简易方法

浙公网安备 33010602011771号