有各省各个季度的gdp信息如下_使用mapreduce和hive - 八旗格格的家

  1 package gdp;
  2 
  3 import org.apache.hadoop.io.WritableComparable;
  4 
  5 import java.io.DataInput;
  6 import java.io.DataOutput;
  7 import java.io.IOException;
  8 
  9 public class GdpBean implements WritableComparable<GdpBean> {
 10 
 11     private String province;
 12 
 13     private Integer sessionOne;
 14 
 15     private Integer sessionTwo;
 16 
 17     private Integer sessionThree;
 18 
 19     private Integer sessionFour;
 20 
 21     private Integer totalGdp;
 22 
 23     public String getProvince() {
 24         return province;
 25     }
 26 
 27     public void setProvince(String province) {
 28         this.province = province;
 29     }
 30 
 31     public Integer getSessionOne() {
 32         return sessionOne;
 33     }
 34 
 35     public void setSessionOne(Integer sessionOne) {
 36         this.sessionOne = sessionOne;
 37     }
 38 
 39     public Integer getSessionTwo() {
 40         return sessionTwo;
 41     }
 42 
 43     public void setSessionTwo(Integer sessionTwo) {
 44         this.sessionTwo = sessionTwo;
 45     }
 46 
 47     public Integer getSessionThree() {
 48         return sessionThree;
 49     }
 50 
 51     public void setSessionThree(Integer sessionThree) {
 52         this.sessionThree = sessionThree;
 53     }
 54 
 55     public Integer getSessionFour() {
 56         return sessionFour;
 57     }
 58 
 59     public void setSessionFour(Integer sessionFour) {
 60         this.sessionFour = sessionFour;
 61     }
 62 
 63     public Integer getTotalGdp() {
 64         return totalGdp;
 65     }
 66 
 67     public void setTotalGdp(Integer totalGdp) {
 68         this.totalGdp = totalGdp;
 69     }
 70 
 71     public int compareTo(GdpBean o) {
 72         return o.getProvince().compareTo(this.province);
 73     }
 74 
 75     public void write(DataOutput out) throws IOException {
 76         out.writeUTF(province);
 77         out.writeInt(sessionOne);
 78         out.writeInt(sessionTwo);
 79         out.writeInt(sessionThree);
 80         out.writeInt(sessionFour);
 81         out.writeInt(totalGdp);
 82     }
 83 
 84     public void readFields(DataInput in) throws IOException {
 85         this.province = in.readUTF();
 86         this.sessionOne = in.readInt();
 87         this.sessionTwo = in.readInt();
 88         this.sessionThree = in.readInt();
 89         this.sessionFour = in.readInt();
 90         this.totalGdp = in.readInt();
 91 
 92     }
 93 
 94     public void set(String province, Integer sessionOne, Integer sessionTwo, Integer sessionThree, Integer sessionFour) {
 95         this.province = province;
 96         this.sessionOne = sessionOne;
 97         this.sessionTwo = sessionTwo;
 98         this.sessionThree = sessionThree;
 99         this.sessionFour = sessionFour;
100         this.totalGdp = sessionOne + sessionTwo + sessionThree + sessionFour;
101     }
102 }

 1 package gdp;
 2 
 3 import org.apache.hadoop.io.WritableComparable;
 4 import org.apache.hadoop.io.WritableComparator;
 5 
 6 public class GDPGroupingComparator extends WritableComparator {
 7 
 8     public GDPGroupingComparator() {
 9         super(GdpBean.class, true);
10     }
11 
12     @Override
13     public int compare(WritableComparable a, WritableComparable b) {
14         GdpBean o1 = (GdpBean)a;
15         GdpBean o2 = (GdpBean)b;
16         return o1.getProvince().compareTo(o2.getProvince());
17     }
18 }

  1 package gdp;
  2 
  3 import org.apache.commons.lang.ObjectUtils;
  4 import org.apache.hadoop.conf.Configuration;
  5 import org.apache.hadoop.fs.Path;
  6 import org.apache.hadoop.io.LongWritable;
  7 import org.apache.hadoop.io.NullWritable;
  8 import org.apache.hadoop.io.Text;
  9 import org.apache.hadoop.mapreduce.Job;
 10 import org.apache.hadoop.mapreduce.Mapper;
 11 import org.apache.hadoop.mapreduce.Reducer;
 12 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 13 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 15 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 16 
 17 import java.io.IOException;
 18 import java.util.Iterator;
 19 
 20 public class GDPMapReduce {
 21     public static class GDPMapper extends Mapper<LongWritable, Text, GdpBean, NullWritable>{
 22 
 23         private GdpBean gdpBean;
 24 
 25         @Override
 26         protected void setup(Context context) throws IOException, InterruptedException {
 27             gdpBean = new GdpBean();
 28         }
 29 
 30         @Override
 31         protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
 32             String[] arr = value.toString().split(",");
 33             //判断一下是否有5个字段
 34             if(arr.length == 5){
 35 
 36                 //判断后四个字段是否是正整数
 37                 if(arr[1].matches("^[+]{0,1}(\\d+)$") && arr[2].matches("^[+]{0,1}(\\d+)$") && arr[3].matches("^[+]{0,1}(\\d+)$") && arr[4].matches("^[+]{0,1}(\\d+)$")){
 38                     gdpBean.set(arr[0], Integer.parseInt(arr[1]), Integer.parseInt(arr[2]), Integer.parseInt(arr[3]), Integer.parseInt(arr[4]));
 39                     context.write(gdpBean, NullWritable.get());
 40                 }
 41             }
 42         }
 43     }
 44 
 45     public static class GdpReducer extends Reducer<GdpBean, NullWritable, GdpBean, NullWritable>{
 46         @Override
 47         protected void reduce(GdpBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
 48             Iterator<NullWritable> it = values.iterator();
 49             Integer count = 0;
 50             while(it.hasNext()){
 51                 it.next();
 52                 Integer totalGdp = key.getTotalGdp();
 53                 count += totalGdp;
 54             }
 55             key.setTotalGdp(count);
 56             context.write(key, NullWritable.get());
 57         }
 58     }
 59 
 60     public static class GdpStepTwoMapper extends Mapper<GdpBean, NullWritable, GdpBean, NullWritable>{
 61         @Override
 62         protected void map(GdpBean key, NullWritable value, Context context) throws IOException, InterruptedException {
 63             context.write(key, value);
 64         }
 65     }
 66 
 67     public static class GdpStepTwoReducer extends Reducer<GdpBean, NullWritable, Text, NullWritable>{
 68         @Override
 69         protected void reduce(GdpBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
 70             for(NullWritable value : values){
 71                 Integer totalGdp = key.getTotalGdp();
 72                 String province = key.getProvince();
 73                 context.write(new Text(province + "\t" + totalGdp), NullWritable.get());
 74             }
 75         }
 76     }
 77 
 78     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
 79         Configuration conf = new Configuration();
 80         Job job = Job.getInstance(conf);
 81 
 82         job.setJobName("GDPStepOne");
 83         job.setJarByClass(GDPMapReduce.class);
 84 
 85         job.setMapperClass(GDPMapper.class);
 86         job.setReducerClass(GdpReducer.class);
 87 
 88         job.setOutputKeyClass(GdpBean.class);
 89         job.setOutputValueClass(NullWritable.class);
 90 
 91         job.setPartitionerClass(GDPPartitioner.class);
 92         job.setGroupingComparatorClass(GDPGroupingComparator.class);
 93 
 94         job.setNumReduceTasks(10);
 95 
 96         job.setOutputFormatClass(SequenceFileOutputFormat.class);
 97 
 98         FileInputFormat.addInputPath(job, new Path("C:\\ahadoop\\07\\gdp\\input"));
 99         FileOutputFormat.setOutputPath(job, new Path("C:\\ahadoop\\07\\gdp\\output"));
100 
101         if(job.waitForCompletion(true)){
102             Job job2 = Job.getInstance(conf);
103             job2.setJobName("GDPStepTwo");
104             job2.setJarByClass(GDPMapReduce.class);
105 
106             job2.setMapperClass(GdpStepTwoMapper.class);
107             job2.setReducerClass(GdpStepTwoReducer.class);
108 
109             job2.setMapOutputKeyClass(GdpBean.class);
110             job2.setMapOutputValueClass(NullWritable.class);
111 
112             job2.setOutputKeyClass(Text.class);
113             job2.setOutputValueClass(NullWritable.class);
114 
115             job2.setInputFormatClass(SequenceFileInputFormat.class);
116             job2.setSortComparatorClass(GdpSortComparator.class);
117 
118             FileInputFormat.addInputPath(job2, new Path("C:\\ahadoop\\07\\gdp\\output"));
119             FileOutputFormat.setOutputPath(job2, new Path("C:\\ahadoop\\07\\gdp\\output1"));
120 
121             job2.waitForCompletion(true);
122 
123         }
124 
125     }
126 
127 
128 }

 1 package gdp;
 2 
 3 import org.apache.hadoop.io.NullWritable;
 4 import org.apache.hadoop.mapreduce.Partitioner;
 5 
 6 public class GDPPartitioner extends Partitioner<GdpBean, NullWritable> {
 7     public int getPartition(GdpBean gdpBean, NullWritable nullWritable, int numPartitions) {
 8         return (gdpBean.getProvince().hashCode() & Integer.MAX_VALUE) % numPartitions;
 9     }
10 }

 1 package gdp;
 2 
 3 import org.apache.hadoop.io.WritableComparable;
 4 import org.apache.hadoop.io.WritableComparator;
 5 
 6 public class GdpSortComparator extends WritableComparator {
 7 
 8     public GdpSortComparator() {
 9         super(GdpBean.class, true);
10     }
11 
12     @Override
13     public int compare(WritableComparable a, WritableComparable b) {
14         GdpBean o1 = (GdpBean)a;
15         GdpBean o2 = (GdpBean)b;
16         return o2.getTotalGdp() - o1.getTotalGdp();
17     }
18 }

发表于 2020-04-07 17:45 八旗格格的家阅读(186) 评论(0) 收藏举报