• 博客园logo
  • 会员
  • 众包
  • 新闻
  • 博问
  • 闪存
  • 赞助商
  • HarmonyOS
  • Chat2DB
    • 搜索
      所有博客
    • 搜索
      当前博客
  • 写随笔 我的博客 短消息 简洁模式
    用户头像
    我的博客 我的园子 账号设置 会员中心 简洁模式 ... 退出登录
    注册 登录
jacklee404
Never Stop!
博客园    首页    新随笔    联系   管理    订阅  订阅
MapReduce练习1

MapReduce作业

数据如下图:

image-20230416213219722

AQ@F5JQQDO6@_BH_YATL1QJ

DeptCount.java:

package edu.sugon;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.time.LocalDate;
import java.util.Iterator;

import edu.sugon.DeptMapperOut;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class DeptCount {
    public static int calcAge(int year) {
        LocalDate current_date = LocalDate.now();
        return current_date.getYear() - year;
    }
    static class DeptMapper extends Mapper<LongWritable, Text, Text, DeptMapperOut> {
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DeptMapperOut>.Context context) throws IOException, InterruptedException {
            String row = value.toString();
            String[] attrs = row.split(",");

            String year = attrs[4].split("/")[0];
            int age = calcAge(Integer.parseInt(year));

            DeptMapperOut out = new DeptMapperOut();
            double salary = 0;

            salary += attrs[3].isEmpty()?0:Double.parseDouble(attrs[3]);
            salary += attrs[5].isEmpty()?0:Double.parseDouble(attrs[5]);

            out.init(attrs[1], salary, age);

            context.write(new Text(attrs[2]), out);
        }
    }

    static class DeptReducer extends Reducer<Text, DeptMapperOut, Text, DeptOut> {
        @Override
        protected void reduce(Text key, Iterable<DeptMapperOut> values, Reducer<Text, DeptMapperOut, Text, DeptOut>.Context context) throws IOException, InterruptedException {
            String maxAgeName, maxSalaryName;
            double totSalary, avgSalary, avgAge, maxSalary;
            int maxAge, cnt;

            maxAgeName = maxSalaryName = null;
            totSalary = avgSalary = avgAge = 0;
            maxSalary = cnt = maxAge = 0;

            for (DeptMapperOut out: values) {
                totSalary += out.getSalary();
                avgAge += out.getAge();
                cnt += 1;

                if (out.getSalary() > maxSalary) {
                    maxSalaryName = out.getName();
                    maxSalary = out.getSalary();
                }

                if (out.getAge() > maxAge) {
                    maxAgeName = out.getName();
                    maxAge = out.getAge();
                }
            }

            avgSalary = totSalary / cnt;
            avgAge = avgAge / cnt;

            DeptOut out = new DeptOut();

            out.init(maxAgeName, maxSalaryName, totSalary, avgSalary, maxAge, avgAge);

            context.write(key, out);
        }
    }

    public static void main(String[] args) throws Exception {
        // test calcAge
        //System.out.println(calcAge(2002));

        System.setProperty("HADOOP_USER_NAME", "hadoop");
        System.setProperty("hadoop.home.dir", "D:\\Program Files (x86)\\hadoop-3.1.1");

        //main方法在哪个类
        Job job = new Job(new Configuration());
        job.setJarByClass(DeptCount.class);
        //设置mapper类
        job.setMapperClass(DeptCount.DeptMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DeptMapperOut.class);
        //设置reducer类
        job.setReducerClass(DeptCount.DeptReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DeptOut.class);
        //设置输入文件、输出文件
        FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.10.100:9000/2021030541035/data/emp.csv"));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.10.100:9000/2021030541035/data/output"));
        job.waitForCompletion(true);
    }
}

DeptOut.java

package edu.sugon;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class DeptOut implements Writable {
    private String maxAgeName;
    private String maxSalaryName;
    private double totSalary;
    private double avgSalary;
    private double avgAge;
    private int maxAge;

    public void init(String maxAgeName, String maxSalaryName, double totSalary, double avgSalary, int maxAge, double avgAge) {
        this.maxAgeName = maxAgeName;
        this.maxSalaryName = maxSalaryName;
        this.totSalary = totSalary;
        this.avgSalary = avgSalary;
        this.avgAge = avgAge;
        this.maxAge = maxAge;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(maxAgeName);
        dataOutput.writeUTF(maxSalaryName);
        dataOutput.writeDouble(totSalary);
        dataOutput.writeDouble(avgSalary);
        dataOutput.writeDouble(avgAge);
        dataOutput.writeInt(maxAge);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        maxAgeName = dataInput.readUTF();
        maxSalaryName = dataInput.readUTF();
        totSalary = dataInput.readDouble();
        avgSalary = dataInput.readDouble();
        avgAge = dataInput.readDouble();
        maxAge = dataInput.readInt();
    }

    @Override
    public String toString() {
        return totSalary + "," + avgSalary + "," + maxSalaryName + "," + maxAge + "," + avgAge + "," + maxAgeName;
    }

    public void setAvgSalary(double avgSalary) {
        this.avgSalary = avgSalary;
    }

    public void setMaxAge(int maxAge) {
        this.maxAge = maxAge;
    }

    public void setMaxAgeName(String maxAgeName) {
        this.maxAgeName = maxAgeName;
    }

    public void setAvgAge(double avgAge) {
        this.avgAge = avgAge;
    }

    public void setMaxSalaryName(String maxSalaryName) {
        this.maxSalaryName = maxSalaryName;
    }

    public void setTotSalary(double totSalary) {
        this.totSalary = totSalary;
    }

    public double getAvgSalary() {
        return avgSalary;
    }

    public int getMaxAge() {
        return maxAge;
    }

    public String getMaxAgeName() {
        return maxAgeName;
    }

    public double getAvgAge() {
        return avgAge;
    }

    public String getMaxSalaryName() {
        return maxSalaryName;
    }

    public double getTotSalary() {
        return totSalary;
    }
}

DeptMapperOut.java

package edu.sugon;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class DeptMapperOut implements Writable {
    private String name;
    private double salary;
    private int age;

    public void init(String name, double salary, int age) {
        this.name = name;
        this.salary = salary;
        this.age = age;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(name);
        dataOutput.writeDouble(salary);
        dataOutput.writeInt(age);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        name = dataInput.readUTF();
        salary = dataInput.readDouble();
        age = dataInput.readInt();
    }

    public void setAge(int age) {
        this.age = age;
    }

    public int getAge() {
        return age;
    }

    public String getName() {
        return name;
    }

    public double getSalary() {
        return salary;
    }

    public void setName(String name) {
        this.name = name;
    }

    public void setSalary(double salary) {
        this.salary = salary;
    }
}
posted on 2023-04-16 21:37  Jack404  阅读(35)  评论(0)    收藏  举报
刷新页面返回顶部
博客园  ©  2004-2025
浙公网安备 33010602011771号 浙ICP备2021040463号-3