1 package gdp;
2
3 import org.apache.hadoop.io.WritableComparable;
4 import org.apache.hadoop.io.WritableComparator;
5
6 public class GDPGroupingComparator extends WritableComparator {
7
8 public GDPGroupingComparator() {
9 super(GdpBean.class, true);
10 }
11
12 @Override
13 public int compare(WritableComparable a, WritableComparable b) {
14 GdpBean o1 = (GdpBean)a;
15 GdpBean o2 = (GdpBean)b;
16 return o1.getProvince().compareTo(o2.getProvince());
17 }
18 }
1 package gdp;
2
3 import org.apache.commons.lang.ObjectUtils;
4 import org.apache.hadoop.conf.Configuration;
5 import org.apache.hadoop.fs.Path;
6 import org.apache.hadoop.io.LongWritable;
7 import org.apache.hadoop.io.NullWritable;
8 import org.apache.hadoop.io.Text;
9 import org.apache.hadoop.mapreduce.Job;
10 import org.apache.hadoop.mapreduce.Mapper;
11 import org.apache.hadoop.mapreduce.Reducer;
12 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
13 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
16
17 import java.io.IOException;
18 import java.util.Iterator;
19
20 public class GDPMapReduce {
21 public static class GDPMapper extends Mapper<LongWritable, Text, GdpBean, NullWritable>{
22
23 private GdpBean gdpBean;
24
25 @Override
26 protected void setup(Context context) throws IOException, InterruptedException {
27 gdpBean = new GdpBean();
28 }
29
30 @Override
31 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
32 String[] arr = value.toString().split(",");
33 //判断一下是否有5个字段
34 if(arr.length == 5){
35
36 //判断后四个字段是否是正整数
37 if(arr[1].matches("^[+]{0,1}(\\d+)$") && arr[2].matches("^[+]{0,1}(\\d+)$") && arr[3].matches("^[+]{0,1}(\\d+)$") && arr[4].matches("^[+]{0,1}(\\d+)$")){
38 gdpBean.set(arr[0], Integer.parseInt(arr[1]), Integer.parseInt(arr[2]), Integer.parseInt(arr[3]), Integer.parseInt(arr[4]));
39 context.write(gdpBean, NullWritable.get());
40 }
41 }
42 }
43 }
44
45 public static class GdpReducer extends Reducer<GdpBean, NullWritable, GdpBean, NullWritable>{
46 @Override
47 protected void reduce(GdpBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
48 Iterator<NullWritable> it = values.iterator();
49 Integer count = 0;
50 while(it.hasNext()){
51 it.next();
52 Integer totalGdp = key.getTotalGdp();
53 count += totalGdp;
54 }
55 key.setTotalGdp(count);
56 context.write(key, NullWritable.get());
57 }
58 }
59
60 public static class GdpStepTwoMapper extends Mapper<GdpBean, NullWritable, GdpBean, NullWritable>{
61 @Override
62 protected void map(GdpBean key, NullWritable value, Context context) throws IOException, InterruptedException {
63 context.write(key, value);
64 }
65 }
66
67 public static class GdpStepTwoReducer extends Reducer<GdpBean, NullWritable, Text, NullWritable>{
68 @Override
69 protected void reduce(GdpBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
70 for(NullWritable value : values){
71 Integer totalGdp = key.getTotalGdp();
72 String province = key.getProvince();
73 context.write(new Text(province + "\t" + totalGdp), NullWritable.get());
74 }
75 }
76 }
77
78 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
79 Configuration conf = new Configuration();
80 Job job = Job.getInstance(conf);
81
82 job.setJobName("GDPStepOne");
83 job.setJarByClass(GDPMapReduce.class);
84
85 job.setMapperClass(GDPMapper.class);
86 job.setReducerClass(GdpReducer.class);
87
88 job.setOutputKeyClass(GdpBean.class);
89 job.setOutputValueClass(NullWritable.class);
90
91 job.setPartitionerClass(GDPPartitioner.class);
92 job.setGroupingComparatorClass(GDPGroupingComparator.class);
93
94 job.setNumReduceTasks(10);
95
96 job.setOutputFormatClass(SequenceFileOutputFormat.class);
97
98 FileInputFormat.addInputPath(job, new Path("C:\\ahadoop\\07\\gdp\\input"));
99 FileOutputFormat.setOutputPath(job, new Path("C:\\ahadoop\\07\\gdp\\output"));
100
101 if(job.waitForCompletion(true)){
102 Job job2 = Job.getInstance(conf);
103 job2.setJobName("GDPStepTwo");
104 job2.setJarByClass(GDPMapReduce.class);
105
106 job2.setMapperClass(GdpStepTwoMapper.class);
107 job2.setReducerClass(GdpStepTwoReducer.class);
108
109 job2.setMapOutputKeyClass(GdpBean.class);
110 job2.setMapOutputValueClass(NullWritable.class);
111
112 job2.setOutputKeyClass(Text.class);
113 job2.setOutputValueClass(NullWritable.class);
114
115 job2.setInputFormatClass(SequenceFileInputFormat.class);
116 job2.setSortComparatorClass(GdpSortComparator.class);
117
118 FileInputFormat.addInputPath(job2, new Path("C:\\ahadoop\\07\\gdp\\output"));
119 FileOutputFormat.setOutputPath(job2, new Path("C:\\ahadoop\\07\\gdp\\output1"));
120
121 job2.waitForCompletion(true);
122
123 }
124
125 }
126
127
128 }
1 package gdp;
2
3 import org.apache.hadoop.io.NullWritable;
4 import org.apache.hadoop.mapreduce.Partitioner;
5
6 public class GDPPartitioner extends Partitioner<GdpBean, NullWritable> {
7 public int getPartition(GdpBean gdpBean, NullWritable nullWritable, int numPartitions) {
8 return (gdpBean.getProvince().hashCode() & Integer.MAX_VALUE) % numPartitions;
9 }
10 }
1 package gdp;
2
3 import org.apache.hadoop.io.WritableComparable;
4 import org.apache.hadoop.io.WritableComparator;
5
6 public class GdpSortComparator extends WritableComparator {
7
8 public GdpSortComparator() {
9 super(GdpBean.class, true);
10 }
11
12 @Override
13 public int compare(WritableComparable a, WritableComparable b) {
14 GdpBean o1 = (GdpBean)a;
15 GdpBean o2 = (GdpBean)b;
16 return o2.getTotalGdp() - o1.getTotalGdp();
17 }
18 }