需求如下:针对以下订单数据,计算每一个订单中成交金额最大的一笔交易,样例日志内容如下:
Order_0000001,Pdt_01,222.8
Order_0000001,Pdt_05,25.8
Order_0000002,Pdt_03,522.8
Order_0000001,Pdt_04,122.4
Order_0000003,Pdt_01,222.8
评分规则:
将以上表中的数据上传到hdfs中(2分),并进行查看(2分)
编写map端代码,将数据格式转换为key,value格式,并传递到reduce端(4分)
编写reduce端程序,将数据进行聚合操作,完成reduce端的代码编写(4分--如不需要reduce端、将reduce端个数设置为0即可)
编写整合map和reduce的Job类,对编写完成的mapreduce程序进行本地测试(4分)
将程序打包,
1 package com.order; 2 3 import com.sun.istack.NotNull; 4 import org.apache.hadoop.io.DoubleWritable; 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.io.WritableComparable; 7 8 import java.io.DataInput; 9 import java.io.DataOutput; 10 import java.io.IOException; 11 12 public class OrderBean implements WritableComparable<OrderBean> { 13 private Text itemid; 14 15 private DoubleWritable amount; 16 public OrderBean(){ 17 18 } 19 public OrderBean(Text itemid,DoubleWritable amount){ 20 setItemid(itemid); 21 setAmount(amount); 22 23 } 24 25 public void setItemid(Text itemid) { 26 this.itemid = itemid; 27 } 28 29 public void setAmount(DoubleWritable amount) { 30 this.amount = amount; 31 } 32 33 public DoubleWritable getAmount() { 34 return amount; 35 } 36 37 public Text getItemid() { 38 return itemid; 39 } 40 41 public int compareTo(@NotNull OrderBean o) { 42 int cmp = this.itemid.compareTo(o.getItemid()); 43 if (cmp == 0){ 44 cmp = -this.amount.compareTo(o.getAmount()); 45 } 46 return cmp; 47 } 48 49 public void write(DataOutput out) throws IOException { 50 out.writeUTF(itemid.toString()); 51 out.writeDouble(amount.get()); 52 } 53 54 public void readFields(DataInput in) throws IOException { 55 String readUTF = in.readUTF(); 56 double readDouble = in.readDouble(); 57 this.itemid = new Text(readUTF); 58 this.amount = new DoubleWritable(readDouble); 59 } 60 61 @Override 62 public String toString() { 63 return "OrderBean{" + 64 "itemid=" + itemid + 65 ", amount=" + amount + 66 '}'; 67 } 68 69 70 71 }
1 package com.order; 2 3 import org.apache.hadoop.io.NullWritable; 4 import org.apache.hadoop.mapreduce.Partitioner; 5 6 public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable> { 7 public int getPartition(OrderBean orderBean, NullWritable nullWritable, int numReduceTask) { 8 // 相同的id的订单bean,会发往相同的partition 9 // 而且产生的去分数,是会跟用户设置的reduce task数保持一致 10 return (orderBean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTask; 11 } 12 13 }
1 package com.order; 2 3 import org.apache.hadoop.io.WritableComparable; 4 import org.apache.hadoop.io.WritableComparator; 5 6 public class ItemidGroupingComparator extends WritableComparator { 7 8 protected ItemidGroupingComparator(){ 9 super(OrderBean.class,true); 10 } 11 12 @Override 13 public int compare(WritableComparable a, WritableComparable b) { 14 OrderBean abean = (OrderBean) a; 15 OrderBean bbean = (OrderBean) b; 16 return abean.getItemid().compareTo(bbean.getItemid()); 17 } 18 19 }
1 package com.order; 2 3 import org.apache.avro.Schema; 4 import org.apache.hadoop.conf.Configuration; 5 import org.apache.hadoop.fs.Path; 6 import org.apache.hadoop.io.DoubleWritable; 7 import org.apache.hadoop.io.LongWritable; 8 import org.apache.hadoop.io.NullWritable; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.Mapper; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.util.StringUtils; 16 import org.apache.log4j.BasicConfigurator; 17 18 import java.io.IOException; 19 20 public class OrderMapReduce { 21 22 static class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable> { 23 OrderBean bean = new OrderBean(); 24 25 @Override 26 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 27 String line = value.toString(); 28 String[] fields = StringUtils.split(line, ','); 29 bean.setItemid(new Text(fields[0])); 30 bean.setAmount(new DoubleWritable(Double.parseDouble(fields[2]))); 31 context.write(bean, NullWritable.get()); 32 } 33 } 34 35 static class OrderReduce extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable> { 36 // 到达reduce时,相同id的所有bean已经被堪称一组,而金额最大的被排在第一位 37 38 @Override 39 protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { 40 context.write(key, NullWritable.get()); 41 } 42 } 43 44 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 45 BasicConfigurator.configure(); 46 47 Configuration configuration = new Configuration(); 48 Job job = new Job(configuration); 49 50 job.setJarByClass(OrderMapReduce.class); 51 52 job.setMapperClass(OrderMapper.class); 53 job.setReducerClass(OrderReduce.class); 54 55 job.setOutputKeyClass(OrderBean.class); 56 job.setOutputValueClass(NullWritable.class); 57 58 FileInputFormat.setInputPaths(job, new Path("C:\\Users\\Dell\\Desktop\\week03\\input\\order.txt")); 59 FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Dell\\Desktop\\week03\\output")); 60 61 job.setGroupingComparatorClass(ItemidGroupingComparator.class); 62 63 job.setPartitionerClass(ItemIdPartitioner.class); 64 job.setNumReduceTasks(2); 65 job.waitForCompletion(true); 66 } 67 68 }
并在linux上进行测试(4分)