public static class WorldCount_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
System.out.println("split:<" + key + ","+ value + ">" );
String[] strs = value.toString().split(" ");
for (String string : strs) {
System.out.println("map:<" + key + ","+ value + ">" );
context.write(new Text(string),new IntWritable(1));
}
}
}
public static class WorldCount_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int index = 0;
for (IntWritable intWritable : values) {
System.out.println("reduce:<" + key + ","+ intWritable + ">" );
index += intWritable.get();
}
context.write(key,new IntWritable(index));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance();
job.setJarByClass(WorldCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(WorldCount_Mapper.class);
job.setReducerClass(WorldCount_Reducer.class);
FileInputFormat.addInputPath(job,new Path("hdfs://192.168.100.123:8020/input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.100.123:8020/output"));
job.waitForCompletion(true);
}
package hadoop.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MyWordCount {
/*
* KEYIN:是map阶段输入的key(偏移量)
* VALUEIN:是map阶段输入的value(文本文件的内容--行)
* KEYOUT:是map阶段输出的key(单词)
* VALUEOUT:是map阶段输出的value(单词的计数--1)
*
* Java基本数据类型:
* int、short、long、double、float、char、boolean、byte
* hadoop数据类型
* IntWritable、ShortWritable、LongWritable、DoubleWritable、FloatWritable
* ByteWritable、BooleanWritable、NullWritable、Text
* Text:使用utf8编码的文本类型
*/
public static class WordCount_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override //方法的重写
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text,
Text, IntWritable>.Context context)
throws IOException, InterruptedException {
String[] line = value.toString().split(" "); //将获取到的数据以空格进行切分成一个个单词
for (String word : line) { //遍历单词的数组
context.write(new Text(word), new IntWritable(1)); //单词进行计数,将中间结果写入context
}
}
}
/*
* KEYIN:reduce阶段输入的key(单词)
* VALUEIN:reduce阶段输入的value(单词的计数)
* KEYOUT:reduce阶段输出的key(单词)
* VALUEOUT:reduce阶段输出的value(单词计数的总和)
*
* reduce方法中做以下修改:
* 将Text arg0改为Text key
* 将Iterable<IntWritable> arg1改为Iterable<IntWritable> value
* 将Context arg2修改为Context context
*/
public static class WordCount_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
int sum = 0; //创建一个变量,和
for (IntWritable intWritable : values) { //遍历相同key单词的计数
sum += intWritable.get(); //将相同key单词的计数进行累加
}
context.write(key, new IntWritable(sum)); //将计算的结果写入context
}
}
//提交工作
public static void main(String[] args) throws Exception {
String inPath= "hdfs://192.168.182.10:8020/input.txt";
String outPath = "hdfs://192.168.182.10:8020/output/";
Configuration conf = new Configuration();
Job job = Job.getInstance(); //创建Job对象job
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outPath))) {
fs.delete(new Path(outPath), true);
}
job.setJarByClass(MyWordCount.class); //设置运行的主类MyWordCount
job.setMapperClass(WordCount_Mapper.class); //设置Mapper的主类
job.setReducerClass(WordCount_Reducer.class); //设置Reduce的主类
job.setOutputKeyClass(Text.class); //设置输出key的类型
job.setOutputValueClass(IntWritable.class); //设置输出value的类型
//设置文件的输入路径(根据自己的IP和HDFS地址设置)
FileInputFormat.addInputPath(job, new Path(inPath));
//设置计算结果的输出路径(根据自己的IP和HDFS地址设置)
FileOutputFormat.setOutputPath(job, new Path(outPath));
System.exit((job.waitForCompletion(true)?0:1)); //提交任务并等待任务完成
}
}