partition
spill溢出前需要对数据进行分区和排序,即会对环形缓冲区里面的每个(k,v)键值对hash一个partition值,相同partition值为同一分区,然会把环形缓冲区中的数据根据partition值和key值两个关键字升序排序;同一partition内的按照key排序;
package com.atguigu.partition; /* 序列化 每个电话的Upflow和Downflow和Sumflow/ 分区 */ import com.atguigu.flow.flowBean; import com.atguigu.flow.flowMapper; import com.atguigu.flow.flowReducer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class partitionerDriver { public static void main(String[] args) throws Exception { Job job = Job.getInstance(new Configuration()); job.setJarByClass(partitionerDriver.class); job.setMapperClass(flowMapper.class); job.setReducerClass(flowReducer.class); //设置NumReduceTasks job.setNumReduceTasks(5); //设置Partitioner类型 job.setPartitionerClass(MyPartitioner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(flowBean.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(flowBean.class); FileInputFormat.addInputPath(job,new Path("E:\\phone2.txt")); FileOutputFormat.setOutputPath(job,new Path("E:\\out")); boolean b = job.waitForCompletion(true); System.exit(b?0:1); } } /* 输入: 13610009496,300,200,100 13710009496,200,400,100 13800094960,200,100,100 13810009496,300,100,100 13910009496,600,500,100 15210009496,300,500,100 输出: part-r-00000 13610009496 upFlow=300, downFlow=200, sumFlow=500 part-r-00001 13710009496,200,400,100 part-r-00002 13800094960,200,100,100 13810009496,300,100,100 part-r-00003 13910009496,600,500,100 part-r-00004 15210009496,300,500,100 */
package com.atguigu.partition; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; import com.atguigu.flow.flowBean; public class MyPartitioner extends Partitioner <Text, flowBean>{ @Override public int getPartition(Text text, flowBean flowBean, int numPartitions) { String phone = text.toString(); switch(phone.substring(0,3)){ case "136": return 0; case "137": return 1; case "138": return 2; case "139": return 3; default: return 4; } } }
package com.atguigu.flow; import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class flowBean implements Writable { private long upFlow; private long downFlow; private long sumFlow; @Override public String toString(){ return "upFlow=" + upFlow + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow ; } public void set(long upFlow,long downFlow){ this.upFlow=upFlow; this.downFlow=downFlow; this.sumFlow=upFlow+downFlow; } public void setUpFlow(long upFlow) { this.upFlow = upFlow; } public void setDownFlow(long downFlow) { this.downFlow = downFlow; } public void setSumFlow(long sumFlow) { this.sumFlow = sumFlow; } public long getUpFlow() { return upFlow; } public long getDownFlow() { return downFlow; } public long getSumFlow() { return sumFlow; } //序列化:把数据交给框架 public void write(DataOutput out) throws IOException { out.writeLong(upFlow); out.writeLong(downFlow); out.writeLong(sumFlow); } //反序列化:从框架读取数据 public void readFields(DataInput in) throws IOException { upFlow = in.readLong(); downFlow = in.readLong(); sumFlow = in.readLong(); } }
package com.atguigu.flow; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.util.Arrays; import java.io.IOException; public class flowMapper extends Mapper<LongWritable,Text, Text, flowBean> { private Text phone = new Text(); private flowBean flow = new flowBean(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); phone.set(fields[0]); long upFlow = Long.parseLong(fields[1]); long downFlow = Long.parseLong(fields[2]); flow.set(upFlow,downFlow); context.write(phone,flow); } }
package com.atguigu.flow; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class flowReducer extends Reducer<Text,flowBean, Text,flowBean> { private flowBean sumFlow = new flowBean(); @Override protected void reduce(Text key, Iterable<flowBean> values, Context context) throws IOException, InterruptedException { long sumUpflow = 0; long sumDownflow =0; for(flowBean value:values){ sumUpflow += value.getUpFlow(); sumDownflow += value.getDownFlow(); } sumFlow.set(sumUpflow,sumDownflow); context.write(key, sumFlow); } }
posted on 2020-11-17 16:49 happygril3 阅读(478) 评论(0) 收藏 举报
浙公网安备 33010602011771号