MapReduce-【WordCount案例】(三)把键值按照ASCII码奇偶分区(Partitioner)
Posted on 2020-04-26 10:17 MissRong 阅读(211) 评论(0) 收藏 举报【WordCount案例】(三)把键值按照ASCII码奇偶分区(Partitioner)
对于Partitioner,按ctrl+h点击右侧的HashPartitioner,标准规定使用这个分区方式。
一、WordCountMapper类
package WordCount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * <KEYIN, VALUEIN ,KEYOUT, VALUEOUT> * <LongWritable, Text,Text, IntWritable> */ public class WordCountMapper extends Mapper <LongWritable, Text,Text, IntWritable> { Text k = new Text(); IntWritable v = new IntWritable(); /** * ctrl+o 输出集成的方法 */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //1.转换格式:Hadoop-->Java String line = value.toString(); //2.切分数据 String[] words = line.split(" "); //3.输出成:<单词,1> for(String word:words){ k.set(word); v.set(1); context.write(k, v); } } }
二、分区类-WordCountPartitioner
package WordCount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; public class WordCountPartitioner extends Partitioner<Text, IntWritable> { /** * 分区 * @param key 键 * @param value 值 * @param i 分区数 * @return */ @Override public int getPartition(Text key, IntWritable value, int i) { //1.将Text转换成String,再取第一个数 String num=key.toString().substring(0,1); //2.自动装箱成Int类型 int k = Integer.valueOf(num); if( k % 2 == 0){ return 0; }else{ return 1; } } }
三、WordCountReducer类
package WordCount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /** * <KEYIN, VALUEIN, KEYOUT, VALUEOUT> * <Text, IntWritable, Text,IntWritable> */ public class WordCountReducer extends Reducer<Text, IntWritable, Text,IntWritable> { IntWritable v = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //1.初始化次数 int count=0; //2.汇总个数 for (IntWritable value:values) { count += value.get(); } v.set(count); //3.输出总次数 context.write(key,v); } }
四、执行类-WordCountDriver
package WordCount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCountDriver { public static void main(String[] args) throws Exception { //计算时间-开始时间 long startTime=System.currentTimeMillis(); //输入数据的Windows路径,输出的Windows路径(不存在的文件夹) args = new String[]{"D:/new 1", "D:/HDFS/f"}; //1.获取配置信息 Configuration conf = new Configuration(); //一个MR就是一个Job Job job = Job.getInstance(conf); //2.反射类 job.setJarByClass(WordCountDriver.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setPartitionerClass(WordCountPartitioner.class); //设置分区个数 job.setNumReduceTasks(2); //3.Reduce输入的K、V类型:Text、IntWritable job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //Reduce输出的K、V类型--总输出 job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //4.数据的输入和输出的指定目录 FileInputFormat.setInputPaths(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //5.提交job waitForCompletion()包含submit() job.waitForCompletion(true); //计算时间-结束时间 long endTime=System.currentTimeMillis(); System.out.println("程序运行的时间为:"+(endTime-startTime)); } }