1.Access.java // 定义hadoop用的数据类型
package com.mr.access;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 自定义复杂数据类型
* 1) 按照Hadoop的规范,需要实现Writable接口
* 2)按照Hadoop的规范,需要实现write和readFields这两个方法
* 3)定义一个默认的构造方法
*/
public class Access implements Writable {
private String phone;
private long up;
private long down;
private long sum;
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(phone); // 这里先写入的 后面先取出
out.writeLong(up);
out.writeLong(down);
out.writeLong(sum);
}
@Override
public void readFields(DataInput in) throws IOException {
this.phone = in.readUTF(); // 前面先写入的 这里先取出
this.up = in.readLong();
this.down = in.readLong();
this.sum = in.readLong();
}
@Override
public String toString() {
return phone +
"," + up +
"," + down +
"," + sum;
}
public Access() {
}
public Access(String phone, long up, long down) {
this.phone = phone;
this.up = up;
this.down = down;
this.sum = up + down;
}
public void setPhone(String phone) {
this.phone = phone;
}
public void setUp(long up) {
this.up = up;
}
public void setDown(long down) {
this.down = down;
}
public void setSum(long sum) {
this.sum = sum;
}
public String getPhone() {
return phone;
}
public long getUp() {
return up;
}
public long getDown() {
return down;
}
public long getSum() {
return sum;
}
}
2.AccessMapper.java
package com.mr.access;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AccessMapper extends Mapper<LongWritable, Text, Text, Access> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lines = value.toString().split(" ");
String phone = lines[0]; // 取手机号
long up = Long.parseLong(lines[lines.length-3]); //取出上行流量
long down = Long.parseLong(lines[lines.length-2]); //取出下行流量
context.write(new Text(phone), new Access(phone, up, down));
}
}
3.AccessReducer.java
package com.mr.access;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class AccessReducer extends Reducer<Text, Access, NullWritable, Access>{
//public class AccessReducer extends Reducer<Text, Access, Text, Access>{
@Override
protected void reduce(Text key, Iterable<Access> values, Context context) throws IOException, InterruptedException {
long ups = 0;
long downs = 0;
for (Access access:values){
ups += access.getUp();
downs += access.getDown();
}
context.write(NullWritable.get(), new Access(key.toString(), ups, downs));
// context.write(new Text(key.toString()), new Access(key.toString(), ups, downs));
}
}
4.AccessPartitioner.java // 自定义reduce个数和规则
package com.mr.access;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* MapReduce自定义分区规则
*/
public class AccessPartitioner extends Partitioner<Text, Access>{
/**
* @param phone 手机号
*/
@Override
public int getPartition(Text phone, Access access, int numReduceTasks) {
if(phone.toString().startsWith("13")) {
return 0;
} else if(phone.toString().startsWith("15")) {
return 1;
} else {
return 2;
}
}
}
5.AccessLocalApp.java
package com.mr.access;
import com.imooc.bigdata.hadoop.mr.wc.FileUtilsDelete;
import com.imooc.bigdata.hadoop.mr.wc.WordCountReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class AccessLocalApp {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
job.setJarByClass(AccessLocalApp.class);
job.setMapperClass(AccessMapper.class);
job.setReducerClass(AccessReducer.class);
// // 添加Combiner的设置即可
// job.setCombinerClass(AccessReducer.class);
// 设置自定义分区规则
job.setPartitionerClass(AccessPartitioner.class);
// 设置reduce个数
job.setNumReduceTasks(3);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Access.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Access.class);
FileUtilsDelete.deleteDirectory("access\\output"); // // 如果输出目录已经存在,则先删除
FileInputFormat.setInputPaths(job, new Path("access/input"));
FileOutputFormat.setOutputPath(job, new Path("access/output"));
job.waitForCompletion(true);
}
}