partition

spill溢出前需要对数据进行分区和排序，即会对环形缓冲区里面的每个(k,v)键值对hash一个partition值，相同partition值为同一分区，然会把环形缓冲区中的数据根据partition值和key值两个关键字升序排序；同一partition内的按照key排序；

package com.atguigu.partition;

/*
序列化
每个电话的Upflow和Downflow和Sumflow/
分区
 */
import com.atguigu.flow.flowBean;
import com.atguigu.flow.flowMapper;
import com.atguigu.flow.flowReducer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class partitionerDriver {

    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());

        job.setJarByClass(partitionerDriver.class);


        job.setMapperClass(flowMapper.class);
        job.setReducerClass(flowReducer.class);

        //设置NumReduceTasks
        job.setNumReduceTasks(5);

        //设置Partitioner类型
        job.setPartitionerClass(MyPartitioner.class);


        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(flowBean.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(flowBean.class);

        FileInputFormat.addInputPath(job,new Path("E:\\phone2.txt"));
        FileOutputFormat.setOutputPath(job,new Path("E:\\out"));

        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

/*
输入：
13610009496,300,200,100
13710009496,200,400,100
13800094960,200,100,100
13810009496,300,100,100
13910009496,600,500,100
15210009496,300,500,100

输出：
part-r-00000
13610009496    upFlow=300, downFlow=200, sumFlow=500
part-r-00001
13710009496,200,400,100
part-r-00002
13800094960,200,100,100
13810009496,300,100,100
part-r-00003
13910009496,600,500,100
part-r-00004
15210009496,300,500,100
 */

package com.atguigu.partition;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import com.atguigu.flow.flowBean;

public class MyPartitioner extends Partitioner <Text, flowBean>{


    @Override
    public int getPartition(Text text, flowBean flowBean, int numPartitions) {
        String phone = text.toString();

        switch(phone.substring(0,3)){
            case "136":
                return 0;
            case "137":
                return 1;
            case "138":
                return 2;
            case "139":
                return 3;
            default:
                return 4;

        }


    }
}

package com.atguigu.flow;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class flowBean implements Writable {
    private long upFlow;
    private long downFlow;
    private long sumFlow;

    @Override
    public String toString(){
        return "upFlow=" + upFlow + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow ;
    }

    public void set(long upFlow,long downFlow){
        this.upFlow=upFlow;
        this.downFlow=downFlow;
        this.sumFlow=upFlow+downFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    //序列化:把数据交给框架
    public void write(DataOutput out) throws IOException {
        out.writeLong(upFlow);
        out.writeLong(downFlow);
        out.writeLong(sumFlow);


    }
    //反序列化：从框架读取数据
    public void readFields(DataInput in) throws IOException {
        upFlow = in.readLong();
        downFlow = in.readLong();
        sumFlow = in.readLong();


    }
}

package com.atguigu.flow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.util.Arrays;

import java.io.IOException;

public class flowMapper extends Mapper<LongWritable,Text, Text, flowBean> {

    private Text phone = new Text();
    private flowBean flow = new flowBean();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] fields = line.split(",");


        phone.set(fields[0]);
        long upFlow = Long.parseLong(fields[1]);
        long downFlow = Long.parseLong(fields[2]);

        flow.set(upFlow,downFlow);
        context.write(phone,flow);




    }
}

package com.atguigu.flow;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class flowReducer extends Reducer<Text,flowBean, Text,flowBean> {

    private flowBean sumFlow = new flowBean();
    @Override
    protected void reduce(Text key, Iterable<flowBean> values, Context context) throws IOException, InterruptedException {

        long sumUpflow = 0;
        long sumDownflow =0;


        for(flowBean value:values){
            sumUpflow += value.getUpFlow();
            sumDownflow += value.getDownFlow();

        }
        sumFlow.set(sumUpflow,sumDownflow);

        context.write(key, sumFlow);

    }
}

posted on 2020-11-17 16:49 happygril3 阅读(478) 评论(0) 收藏举报

刷新页面返回顶部

happygril3

partition

导航

公告