MapJoin

Mapjoin
1.适用于一张表十分小(小于15M,HIVE[25M]),一张表很大得场景
2.map端完成join,不需要reduce,不需要shuffer(分组全排序),不需要数据倾斜

package com.atguigu.mapJoin;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.net.URI;

public class MJdriver {
    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());


        job.setJarByClass(MJdriver.class);

        job.setMapperClass(MJmapper.class);
        job.setNumReduceTasks(0);

        job.addCacheFile(URI.create("input/pd.txt"));

        FileInputFormat.setInputPaths(job,new Path("input/order.txt"));
        FileOutputFormat.setOutputPath(job,new Path("output"));

        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);


    }
}

 

 

package com.atguigu.mapJoin;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.mortbay.util.StringUtil;

import java.io.*;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;

public class MJmapper extends Mapper<LongWritable,Text, Text, NullWritable> {

    private Map<String,String> pMap = new HashMap<>();
    private Text k =new Text();



    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        URI[] cacheFiles = context.getCacheFiles();
        String path = cacheFiles[0].toString();

        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
        String line;
        while (StringUtils.isNotEmpty(line = bufferedReader.readLine())){
            String[] fields = line.split(",");
            pMap.put(fields[0],fields[1]);
        }

    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] fields = value.toString().split(",");
        String pname = pMap.get(fields[1]);

        if(pname == null){
            pname ="NULL";
        }

        k.set(fields[0]+"\t"+pname+"\t"+fields[2]);
        context.write(k,NullWritable.get());


    }
}

 

posted on 2020-11-22 11:01  happygril3  阅读(257)  评论(0)    收藏  举报

导航