MapJoin
Mapjoin
1.适用于一张表十分小(小于15M,HIVE[25M]),一张表很大得场景
2.map端完成join,不需要reduce,不需要shuffer(分组全排序),不需要数据倾斜
package com.atguigu.mapJoin; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.net.URI; public class MJdriver { public static void main(String[] args) throws Exception { Job job = Job.getInstance(new Configuration()); job.setJarByClass(MJdriver.class); job.setMapperClass(MJmapper.class); job.setNumReduceTasks(0); job.addCacheFile(URI.create("input/pd.txt")); FileInputFormat.setInputPaths(job,new Path("input/order.txt")); FileOutputFormat.setOutputPath(job,new Path("output")); boolean b = job.waitForCompletion(true); System.exit(b?0:1); } }
package com.atguigu.mapJoin; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.mortbay.util.StringUtil; import java.io.*; import java.net.URI; import java.util.HashMap; import java.util.Map; public class MJmapper extends Mapper<LongWritable,Text, Text, NullWritable> { private Map<String,String> pMap = new HashMap<>(); private Text k =new Text(); @Override protected void setup(Context context) throws IOException, InterruptedException { URI[] cacheFiles = context.getCacheFiles(); String path = cacheFiles[0].toString(); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(path))); String line; while (StringUtils.isNotEmpty(line = bufferedReader.readLine())){ String[] fields = line.split(","); pMap.put(fields[0],fields[1]); } } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] fields = value.toString().split(","); String pname = pMap.get(fields[1]); if(pname == null){ pname ="NULL"; } k.set(fields[0]+"\t"+pname+"\t"+fields[2]); context.write(k,NullWritable.get()); } }
posted on 2020-11-22 11:01 happygril3 阅读(257) 评论(0) 收藏 举报