需求如下:针对以下订单数据,计算每一个订单中成交金额最大的一笔交易,样例日志内容如下:

Order_0000001,Pdt_01,222.8
Order_0000001,Pdt_05,25.8
Order_0000002,Pdt_03,522.8
Order_0000001,Pdt_04,122.4
Order_0000003,Pdt_01,222.8

评分规则:

将以上表中的数据上传到hdfs(2),并进行查看(2)

         编写map端代码,将数据格式转换为key,value格式,并传递到reduce(4)

         编写reduce端程序,将数据进行聚合操作,完成reduce端的代码编写(4--如不需要reduce端、将reduce端个数设置为0即可)

         编写整合mapreduceJob类,对编写完成的mapreduce程序进行本地测试(4)

 将程序打包,

 1 package com.order;
 2 
 3 import com.sun.istack.NotNull;
 4 import org.apache.hadoop.io.DoubleWritable;
 5 import org.apache.hadoop.io.Text;
 6 import org.apache.hadoop.io.WritableComparable;
 7 
 8 import java.io.DataInput;
 9 import java.io.DataOutput;
10 import java.io.IOException;
11 
12 public class OrderBean implements WritableComparable<OrderBean> {
13     private Text itemid;
14 
15     private DoubleWritable amount;
16     public OrderBean(){
17 
18     }
19     public OrderBean(Text itemid,DoubleWritable amount){
20         setItemid(itemid);
21         setAmount(amount);
22 
23     }
24 
25     public void setItemid(Text itemid) {
26         this.itemid = itemid;
27     }
28 
29     public void setAmount(DoubleWritable amount) {
30         this.amount = amount;
31     }
32 
33     public DoubleWritable getAmount() {
34         return amount;
35     }
36 
37     public Text getItemid() {
38         return itemid;
39     }
40 
41     public int compareTo(@NotNull OrderBean o) {
42         int cmp = this.itemid.compareTo(o.getItemid());
43         if (cmp == 0){
44             cmp = -this.amount.compareTo(o.getAmount());
45         }
46         return  cmp;
47     }
48 
49     public void write(DataOutput out) throws IOException {
50         out.writeUTF(itemid.toString());
51         out.writeDouble(amount.get());
52     }
53 
54     public void readFields(DataInput in) throws IOException {
55         String readUTF = in.readUTF();
56         double readDouble = in.readDouble();
57         this.itemid = new Text(readUTF);
58         this.amount = new DoubleWritable(readDouble);
59     }
60 
61     @Override
62     public String toString() {
63         return "OrderBean{" +
64                 "itemid=" + itemid +
65                 ", amount=" + amount +
66                 '}';
67     }
68 
69 
70 
71 }

 

 1 package com.order;
 2 
 3 import org.apache.hadoop.io.NullWritable;
 4 import org.apache.hadoop.mapreduce.Partitioner;
 5 
 6 public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable> {
 7     public int getPartition(OrderBean orderBean, NullWritable nullWritable, int numReduceTask) {
 8 //        相同的id的订单bean,会发往相同的partition
 9 //        而且产生的去分数,是会跟用户设置的reduce task数保持一致
10         return (orderBean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTask;
11     }
12 
13 }
 1 package com.order;
 2 
 3 import org.apache.hadoop.io.WritableComparable;
 4 import org.apache.hadoop.io.WritableComparator;
 5 
 6 public class ItemidGroupingComparator extends WritableComparator {
 7 
 8     protected ItemidGroupingComparator(){
 9         super(OrderBean.class,true);
10     }
11 
12     @Override
13     public int compare(WritableComparable a, WritableComparable b) {
14         OrderBean abean = (OrderBean) a;
15         OrderBean bbean = (OrderBean) b;
16         return abean.getItemid().compareTo(bbean.getItemid());
17     }
18 
19 }
 1 package com.order;
 2 
 3 import org.apache.avro.Schema;
 4 import org.apache.hadoop.conf.Configuration;
 5 import org.apache.hadoop.fs.Path;
 6 import org.apache.hadoop.io.DoubleWritable;
 7 import org.apache.hadoop.io.LongWritable;
 8 import org.apache.hadoop.io.NullWritable;
 9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.util.StringUtils;
16 import org.apache.log4j.BasicConfigurator;
17 
18 import java.io.IOException;
19 
20 public class OrderMapReduce {
21 
22     static class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable> {
23         OrderBean bean = new OrderBean();
24 
25         @Override
26         protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
27             String line = value.toString();
28             String[] fields = StringUtils.split(line, ',');
29             bean.setItemid(new Text(fields[0]));
30             bean.setAmount(new DoubleWritable(Double.parseDouble(fields[2])));
31             context.write(bean, NullWritable.get());
32         }
33     }
34 
35     static class OrderReduce extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable> {
36 //        到达reduce时,相同id的所有bean已经被堪称一组,而金额最大的被排在第一位
37 
38         @Override
39         protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
40             context.write(key, NullWritable.get());
41         }
42     }
43 
44     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
45         BasicConfigurator.configure();
46 
47         Configuration configuration = new Configuration();
48         Job job = new Job(configuration);
49 
50         job.setJarByClass(OrderMapReduce.class);
51 
52         job.setMapperClass(OrderMapper.class);
53         job.setReducerClass(OrderReduce.class);
54 
55         job.setOutputKeyClass(OrderBean.class);
56         job.setOutputValueClass(NullWritable.class);
57 
58         FileInputFormat.setInputPaths(job, new Path("C:\\Users\\Dell\\Desktop\\week03\\input\\order.txt"));
59         FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Dell\\Desktop\\week03\\output"));
60 
61         job.setGroupingComparatorClass(ItemidGroupingComparator.class);
62 
63         job.setPartitionerClass(ItemIdPartitioner.class);
64         job.setNumReduceTasks(2);
65         job.waitForCompletion(true);
66     }
67 
68 }

 

并在linux上进行测试(4)