WordCount案例实操

1.环境准备:

在pom.xml文件中添加如下依赖

<dependencies>

    <dependency>

        <groupId>junit</groupId>

        <artifactId>junit</artifactId>

        <version>4.12</version>

    </dependency>

    <dependency>

        <groupId>org.apache.logging.log4j</groupId>

        <artifactId>log4j-slf4j-impl</artifactId>

        <version>2.12.0</version>

    </dependency>

    <dependency>

        <groupId>org.apache.hadoop</groupId>

        <artifactId>hadoop-client</artifactId>

        <version>3.1.3</version>

    </dependency>

</dependencies>

 

在项目的src/main/resources目录下,新建一个文件,命名为“log4j2.xml”,在文件中填入

<?xml version="1.0" encoding="UTF-8"?>

<Configuration status="error" strict="true" name="XMLConfig">

    <Appenders>

        <!-- 类型名为Console,名称为必须属性 -->

        <Appender type="Console" name="STDOUT">

            <!-- 布局为PatternLayout的方式,

            输出样式为[INFO] [2018-01-22 17:34:01][org.test.Console]I'm here -->

            <Layout type="PatternLayout"

                    pattern="[%p] [%d{yyyy-MM-dd HH:mm:ss}][%c{10}]%m%n" />

        </Appender>

 

    </Appenders>

 

    <Loggers>

        <!-- 可加性为false -->

        <Logger name="test" level="info" additivity="false">

            <AppenderRef ref="STDOUT" />

        </Logger>

 

        <!-- root loggerConfig设置 -->

        <Root level="info">

            <AppenderRef ref="STDOUT" />

        </Root>

    </Loggers>

 

</Configuration>

 

2.Map端代码实现:

package com.atguigu.hdfs.wordcount;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WordCountMapper extends Mapper<LongWritable, Text,Text,LongWritable> {
    private Text outKey = new Text();
    private LongWritable outValue = new LongWritable(1);

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //按行读取数据,并将其转化为String
        String val = value.toString();
        //对数据进行切分
        String[] words = val.split(" ");
        //循环遍历数据
        for (String word : words) {
            outKey.set(word);
            context.write(outKey,outValue);
        }
    }
}

3.Reduce端代码实现:

package com.atguigu.hdfs.wordcount;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Iterator;

public class WordCountReducer extends Reducer<Text, LongWritable,Text,LongWritable> {
    private int sum;
    private LongWritable outVal = new LongWritable();
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        sum = 0;
        //使用增强for的方式,遍历values
        for (LongWritable count : values) {
            sum += count.get();
        }
        outVal.set(sum);
        context.write(key,outVal);
    }
}

4.Driver的代码实现:

package com.atguigu.hdfs.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WordCountDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //获取job对象
        Job job = Job.getInstance(new Configuration());
        //关联本程序的jar包
        job.setJarByClass(WordCountDriver.class);
        //关联map和reduce
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        //定义map端输出的泛型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        //定义最终输出的范型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        //定义输入输出路径
        FileInputFormat.setInputPaths(job,new Path("E:\\hello.txt"));
        FileOutputFormat.setOutputPath(job,new Path("E:\\outwordcount\\result"));
        //提交job
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);

    }
}

 

posted @ 2022-05-22 11:21  黑山魁七  阅读(36)  评论(0)    收藏  举报