MapReduce作业
数据如下图:
DeptCount.java:
package edu.sugon;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.time.LocalDate;
import java.util.Iterator;
import edu.sugon.DeptMapperOut;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DeptCount {
public static int calcAge(int year) {
LocalDate current_date = LocalDate.now();
return current_date.getYear() - year;
}
static class DeptMapper extends Mapper<LongWritable, Text, Text, DeptMapperOut> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DeptMapperOut>.Context context) throws IOException, InterruptedException {
String row = value.toString();
String[] attrs = row.split(",");
String year = attrs[4].split("/")[0];
int age = calcAge(Integer.parseInt(year));
DeptMapperOut out = new DeptMapperOut();
double salary = 0;
salary += attrs[3].isEmpty()?0:Double.parseDouble(attrs[3]);
salary += attrs[5].isEmpty()?0:Double.parseDouble(attrs[5]);
out.init(attrs[1], salary, age);
context.write(new Text(attrs[2]), out);
}
}
static class DeptReducer extends Reducer<Text, DeptMapperOut, Text, DeptOut> {
@Override
protected void reduce(Text key, Iterable<DeptMapperOut> values, Reducer<Text, DeptMapperOut, Text, DeptOut>.Context context) throws IOException, InterruptedException {
String maxAgeName, maxSalaryName;
double totSalary, avgSalary, avgAge, maxSalary;
int maxAge, cnt;
maxAgeName = maxSalaryName = null;
totSalary = avgSalary = avgAge = 0;
maxSalary = cnt = maxAge = 0;
for (DeptMapperOut out: values) {
totSalary += out.getSalary();
avgAge += out.getAge();
cnt += 1;
if (out.getSalary() > maxSalary) {
maxSalaryName = out.getName();
maxSalary = out.getSalary();
}
if (out.getAge() > maxAge) {
maxAgeName = out.getName();
maxAge = out.getAge();
}
}
avgSalary = totSalary / cnt;
avgAge = avgAge / cnt;
DeptOut out = new DeptOut();
out.init(maxAgeName, maxSalaryName, totSalary, avgSalary, maxAge, avgAge);
context.write(key, out);
}
}
public static void main(String[] args) throws Exception {
// test calcAge
//System.out.println(calcAge(2002));
System.setProperty("HADOOP_USER_NAME", "hadoop");
System.setProperty("hadoop.home.dir", "D:\\Program Files (x86)\\hadoop-3.1.1");
//main方法在哪个类
Job job = new Job(new Configuration());
job.setJarByClass(DeptCount.class);
//设置mapper类
job.setMapperClass(DeptCount.DeptMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DeptMapperOut.class);
//设置reducer类
job.setReducerClass(DeptCount.DeptReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DeptOut.class);
//设置输入文件、输出文件
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.10.100:9000/2021030541035/data/emp.csv"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.10.100:9000/2021030541035/data/output"));
job.waitForCompletion(true);
}
}
DeptOut.java
package edu.sugon;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class DeptOut implements Writable {
private String maxAgeName;
private String maxSalaryName;
private double totSalary;
private double avgSalary;
private double avgAge;
private int maxAge;
public void init(String maxAgeName, String maxSalaryName, double totSalary, double avgSalary, int maxAge, double avgAge) {
this.maxAgeName = maxAgeName;
this.maxSalaryName = maxSalaryName;
this.totSalary = totSalary;
this.avgSalary = avgSalary;
this.avgAge = avgAge;
this.maxAge = maxAge;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(maxAgeName);
dataOutput.writeUTF(maxSalaryName);
dataOutput.writeDouble(totSalary);
dataOutput.writeDouble(avgSalary);
dataOutput.writeDouble(avgAge);
dataOutput.writeInt(maxAge);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
maxAgeName = dataInput.readUTF();
maxSalaryName = dataInput.readUTF();
totSalary = dataInput.readDouble();
avgSalary = dataInput.readDouble();
avgAge = dataInput.readDouble();
maxAge = dataInput.readInt();
}
@Override
public String toString() {
return totSalary + "," + avgSalary + "," + maxSalaryName + "," + maxAge + "," + avgAge + "," + maxAgeName;
}
public void setAvgSalary(double avgSalary) {
this.avgSalary = avgSalary;
}
public void setMaxAge(int maxAge) {
this.maxAge = maxAge;
}
public void setMaxAgeName(String maxAgeName) {
this.maxAgeName = maxAgeName;
}
public void setAvgAge(double avgAge) {
this.avgAge = avgAge;
}
public void setMaxSalaryName(String maxSalaryName) {
this.maxSalaryName = maxSalaryName;
}
public void setTotSalary(double totSalary) {
this.totSalary = totSalary;
}
public double getAvgSalary() {
return avgSalary;
}
public int getMaxAge() {
return maxAge;
}
public String getMaxAgeName() {
return maxAgeName;
}
public double getAvgAge() {
return avgAge;
}
public String getMaxSalaryName() {
return maxSalaryName;
}
public double getTotSalary() {
return totSalary;
}
}
DeptMapperOut.java
package edu.sugon;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class DeptMapperOut implements Writable {
private String name;
private double salary;
private int age;
public void init(String name, double salary, int age) {
this.name = name;
this.salary = salary;
this.age = age;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(name);
dataOutput.writeDouble(salary);
dataOutput.writeInt(age);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
name = dataInput.readUTF();
salary = dataInput.readDouble();
age = dataInput.readInt();
}
public void setAge(int age) {
this.age = age;
}
public int getAge() {
return age;
}
public String getName() {
return name;
}
public double getSalary() {
return salary;
}
public void setName(String name) {
this.name = name;
}
public void setSalary(double salary) {
this.salary = salary;
}
}