WordCount案例实操
1.环境准备:
在pom.xml文件中添加如下依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.3</version>
</dependency>
</dependencies>
在项目的src/main/resources目录下,新建一个文件,命名为“log4j2.xml”,在文件中填入
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="error" strict="true" name="XMLConfig">
<Appenders>
<!-- 类型名为Console,名称为必须属性 -->
<Appender type="Console" name="STDOUT">
<!-- 布局为PatternLayout的方式,
输出样式为[INFO] [2018-01-22 17:34:01][org.test.Console]I'm here -->
<Layout type="PatternLayout"
pattern="[%p] [%d{yyyy-MM-dd HH:mm:ss}][%c{10}]%m%n" />
</Appender>
</Appenders>
<Loggers>
<!-- 可加性为false -->
<Logger name="test" level="info" additivity="false">
<AppenderRef ref="STDOUT" />
</Logger>
<!-- root loggerConfig设置 -->
<Root level="info">
<AppenderRef ref="STDOUT" />
</Root>
</Loggers>
</Configuration>
2.Map端代码实现:
package com.atguigu.hdfs.wordcount; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class WordCountMapper extends Mapper<LongWritable, Text,Text,LongWritable> { private Text outKey = new Text(); private LongWritable outValue = new LongWritable(1); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //按行读取数据,并将其转化为String String val = value.toString(); //对数据进行切分 String[] words = val.split(" "); //循环遍历数据 for (String word : words) { outKey.set(word); context.write(outKey,outValue); } } }
3.Reduce端代码实现:
package com.atguigu.hdfs.wordcount; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.Iterator; public class WordCountReducer extends Reducer<Text, LongWritable,Text,LongWritable> { private int sum; private LongWritable outVal = new LongWritable(); @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { sum = 0; //使用增强for的方式,遍历values for (LongWritable count : values) { sum += count.get(); } outVal.set(sum); context.write(key,outVal); } }
4.Driver的代码实现:
package com.atguigu.hdfs.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class WordCountDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { //获取job对象 Job job = Job.getInstance(new Configuration()); //关联本程序的jar包 job.setJarByClass(WordCountDriver.class); //关联map和reduce job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); //定义map端输出的泛型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //定义最终输出的范型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //定义输入输出路径 FileInputFormat.setInputPaths(job,new Path("E:\\hello.txt")); FileOutputFormat.setOutputPath(job,new Path("E:\\outwordcount\\result")); //提交job boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }

浙公网安备 33010602011771号