一、基本原理

数据源--MapReduce--数据结果

temperature.txt -- JAR -- part-r-00000

二、实验过程

1. 数据源

气象数据具体的下载地址为 ftp://ftp3.ncdc.noaa.gov/pub/data/noaa/ ,该数据包括1900年到现在所有年份的气象数据,大小大概有70多个G。数据源格式示例如下:

0323999999038531972122912005+30400-081417SAO  +0005NRB  V02099959000050304849N011200599+00945+00615102435ADDGA1999+030484034GA2999+076204004GD13075+9999999GD24015+9999999GD30995+9999999GD40995+9999999GF108085999999999999999999MA1999999102375EQDN01 00000JPWTH 1QNNE11 1 00409E11 1 00601E11 1 00099E11 1 00099G11 1 00100H11 1 23100H11 1 32250K11 1 00043L11 1 00700M11 1 30230N11 1 00000Q11 1 10243S11 1 00049V11 1 01010X11 1 00000
0383999999038531972122915005+30400-081417SAO  +0005NRB  V0200405N003652200059N009600599+01565+01175102545ADDGA1999+076204004GA2999+999999999GA3999+999999999GA4999+999999999GD12015+9999999GD20005+9999999GD30005+9999999GD40005+9999999GF101015999999999999999999MA1999999102475MW1055EQDN01 08100JPWTH 1QNNE11 1 00201E11 1 00000E11 1 00000E11 1 00000G11U1 99999H11 1 32250H11 1 00999H11 1 00999H11 1 00999K11 1 00053L11 1 00600M11 1 30260N11 1 08100Q11 1 10254S11 1 00060V11 1 00101X11 1 04007
0383999999038531972122918005+30400-081417SAO  +0005NRB  V0200405N003150762049N009600599+01725+01335102535ADDGA1999+030484034GA2999+076204004GA3999+999999999GA4999+999999999GD12015+9999999GD23065+9999999GD30005+9999999GD40005+9999999GF106055999999999999999999MA1999999102475MW1055EQDN01 08100JPWTH 1QNNE11 1 00201E11 1 00407E11 1 00000E11 1 00000G11 1 00250H11 1 23100H11 1 32250H11 1 00999H11 1 00999K11 1 00056L11 1 00600M11 1 30260N11 1 08100Q11 1 10253S11 1 00063V11 1 00806X11 1 04006
0383999999038531972122921005+30400-081417SAO  +0005NRB  V0200305N003652200059N009600599+01725+01445102385ADDGA1999+999999084GA2999+076204004GA3999+999999999GA4999+999999999GD12005+9999999GD20025+9999999GD30005+9999999GD40005+9999999GF102025999999999999999999MA1999999102345MW1055EQDN01 08100JPWTH 1QNNE11 1 00200E11 1 00002E11 1 00000E11 1 00000G11U1 99999H11 1 11999H11 1 32250H11 1 00999H11 1 00999K11 1 00058L11 1 00600M11 1 30220N11 1 08100Q11 1 10238S11 1 00063V11 1 00202X11 1 03007
0383999999038531972123000005+30400-081417SAO  +0005NRB  V0200105N002152200059N009600599+01565+01445102535ADDGA1999+009144084GA2999+076204004GA3999+999999999GA4999+999999999GD12015+9999999GD22015+9999999GD30005+9999999GD40005+9999999GF102025999999999999999999MA1999999102475MW1445EQDN01 07200JPWTH 1QNNE11 1 00201E11 1 00201E11 1 00000E11 1 00000G11U1 99999H11 1 11030H11 1 32250H11 1 00999H11 1 00999K11 1 00058L11 1 00600M11 1 30260N11 1 07200Q11 1 10253S11 1 00060V11 1 00202X11 1 01004
0383999999038531972123003005+30400-081417SAO  +0005NRB  V0200605N001052200059N009600599+01565+01445102515ADDGA1999+076204004GA2999+999999999GA3999+999999999GA4999+999999999GD12015+9999999GD20005+9999999GD30005+9999999GD40005+9999999GF101015999999999999999999MA1999999102445MW1445EQDN01 07200JPWTH 1QNNE11 1 00201E11 1 00000E11 1 00000E11 1 00000G11U1 99999H11 1 32250H11 1 00999H11 1 00999H11 1 00999K11 1 00058L11 1 00600M11 1 30250N11 1 07200Q11 1 10251S11 1 00060V11 1 00101X11 1 06002
0383999999038531972123006005+30400-081417SAO  +0005NRB  V0200905N002152200059N009600599+01725+01505102375ADDGA1999+004574084GA2999+076204004GA3999+999999999GA4999+999999999GD12025+9999999GD22025+9999999GD30005+9999999GD40005+9999999GF104025999999999999999999MA1999999102305MW1445EQDN01 07200JPWTH 1QNNE11 1 00202E11 1 00203E11 1 00000E11 1 00000G11U1 99999H11 1 11015H11 1 32250H11 1 00999H11 1 00999K11 1 00059L11 1 00600M11 1 30210N11 1 07200Q11 1 10237S11 1 00063V11 1 00503X11 1 09004
0329999999038531972123009005+30400-081417SAO  +0005NRB  V02099959000050762049N009600599+01445+01335102275ADDGA1999+036584034GA2999+076204004GD12025+9999999GD24065+9999999GD30995+9999999GD40995+9999999GF108085999999999999999999MA1999999102205MW1445EQDN01 07200JPWTH 1QNNE11 1 00203E11 1 00607E11 1 00099E11 1 00099G11 1 00250H11 1 23120H11 1 32250K11 1 00056L11 1 00600M11 1 30180N11 1 07200Q11 1 10227S11 1 00058V11 1 01010X11 1 00000
0383999999038531972123012005+30400-081417SAO  +0005NRB  V02099959000050762049N008000599+01335+01335102325ADDGA1999+030484034GA2999+076204004GA3999+999999999GA4999+999999999GD12035+9999999GD23035+9999999GD30005+9999999GD40005+9999999GF106065999999999999999999MA1999999102275MW1445EQDN01 07200JPWTH 1QNNE11 1 00204E11 1 00404E11 1 00000E11 1 00000G11 1 00250H11 1 23100H11 1 32250H11 1 00999H11 1 00999K11 1 00056L11 1 00500M11 1 30200N11 1 07200Q11 1 10232S11 1 00056V11 1 00807X11 1 00000
0356999999038531972123015005+30400-081417SAO  +0005NRB  V02099959000050152449N009600599+01785+01725102325ADDGA1999+015244064GA2999+030484034GA3999+076204004GD13065+9999999GD23015+9999999GD34015+9999999GD40995+9999999GF108085999999999999999999MA1999999102275MW1445EQDN01 07200JPWTH 1QNNE11 1 00408E11 1 00401E11 1 00601E11 1 00099G11 1 00050H11 1 15050H11 1 23100H11 1 32250K11 1 00063L11 1 00600M11 1 30200N11 1 07200Q11 1 10232S11 1 00064V11 1 01010X11 1 00000
0329999999038531972123018005+30400-081417SAO  +0005NRB  V0201805N002150152449N009600599+02175+01835102275ADDGA1999+007624084GA2999+015244064GD12015+9999999GD24075+9999999GD30995+9999999GD40995+9999999GF108085999999999999999999MA1999999102205MW1615EQDN01 02000JPWTH 1QNNE11 1 00201E11 1 00609E11 1 00099E11 1 00099G11 1 00050H11 1 11025H11 1 15050K11 1 00065L11 1 00600M11 1 30180N11 1 02000Q11 1 10227S11 1 00071V11 1 01010X11 1 18004
0377999999038531972123021005+30400-081417SAO  +0005NRB  V0200905N002150152449N011200599+02005+01615102005ADDGA1999+015244064GA2999+076204004GA3999+999999999GA4999+999999999GD13055+9999999GD23025+9999999GD30005+9999999GD40005+9999999GF107075999999999999999999MA1999999101965EQDN01 00000JPWTH 1QNNE11 1 00406E11 1 00403E11 1 00000E11 1 00000G11 1 00050H11 1 15050H11 1 32250H11 1 00999H11 1 00999K11 1 00061L11 1 00700M11 1 30110N11 1 00000Q11 1 10200S11 1 00068V11 1 00909X11 1 09004
0377999999038531972123100005+30400-081417SAO  +0005NRB  V0201005N003650762049N011200599+01835+01505102015ADDGA1999+076204004GA2999+999999999GA3999+999999999GA4999+999999999GD13065+9999999GD20005+9999999GD30005+9999999GD40005+9999999GF106045999999999999999999MA1999999101965EQDN01 00000JPWTH 1QNNE11 1 00407E11 1 00000E11 1 00000E11 1 00000G11 1 00250H11 1 32250H11 1 00999H11 1 00999H11 1 00999K11 1 00059L11 1 00700M11 1 30110N11 1 00000Q11 1 10201S11 1 00065V11 1 00705X11 1 10007
0377999999038531972123103005+30400-081417SAO  +0005NRB  V0201505N004152200059N011200599+01835+01615101875ADDGA1999+076204004GA2999+999999999GA3999+999999999GA4999+999999999GD12025+9999999GD20005+9999999GD30005+9999999GD40005+9999999GF102025999999999999999999MA1999999101835EQDN01 00000JPWTH 1QNNE11 1 00203E11 1 00000E11 1 00000E11 1 00000G11U1 99999H11 1 32250H11 1 00999H11 1 00999H11 1 00999K11 1 00061L11 1 00700M11 1 30070N11 1 00000Q11 1 10187S11 1 00065V11 1 00302X11 1 15008
0329999999038531972123106005+30400-081417SAO  +0005NRB  V02099959000050152449N009600599+01675+01565101935ADDGA1999+015244064GA2999+076204004GD13055+9999999GD24035+9999999GD30995+9999999GD40995+9999999GF108085999999999999999999MA1999999101865MW1445EQDN01 07200JPWTH 1QNNE11 1 00406E11 1 00604E11 1 00099E11 1 00099G11 1 00050H11 1 15050H11 1 32250K11 1 00060L11 1 00600M11 1 30080N11 1 07200Q11 1 10193S11 1 00062V11 1 01010X11 1 00000
0323999999038531972123109005+30400-081417SAO  +0005NRB  V0201905N002650304849N011200599+01945+01615101825ADDGA1999+030484034GA2999+076204004GD13055+9999999GD24035+9999999GD30995+9999999GD40995+9999999GF108085999999999999999999MA1999999101765EQDN01 00000JPWTH 1QNNE11 1 00406E11 1 00604E11 1 00099E11 1 00099G11 1 00100H11 1 23100H11 1 32250K11 1 00061L11 1 00700M11 1 30050N11 1 00000Q11 1 10182S11 1 00067V11 1 01010X11 1 19005
0323999999038531972123112005+30400-081417SAO  +0005NRB  V0202005N002650304849N011200599+01835+01615101925ADDGA1999+030484034GA2999+076204004GD13065+9999999GD24025+9999999GD30995+9999999GD40995+9999999GF108085999999999999999999MA1999999101865EQDN01 00000JPWTH 1QNNE11 1 00407E11 1 00603E11 1 00099E11 1 00099G11 1 00100H11 1 23100H11 1 32250K11 1 00061L11 1 00700M11 1 30080N11 1 00000Q11 1 10192S11 1 00065V11 1 01010X11 1 20005
0323999999038531972123115005+30400-081417SAO  +0005NRB  V0201605N007750304849N011200599+02115+01675101905ADDGA1999+030484034GA2999+076204004GD13065+9999999GD24025+9999999GD30995+9999999GD40995+9999999GF108085999999999999999999MA1999999101865EQDN01 00000JPWTH 1QNNE11 1 00407E11 1 00603E11 1 00099E11 1 00099G11 1 00100H11 1 23100H11 1 32250K11 1 00062L11 1 00700M11 1 30080N11 1 00000Q11 1 10190S11 1 00070V11 1 01010X11 1 16015
0350999999038531972123118005+30400-081417SAO  +0005NRB  V0201705N006250365849N011200599+02395+01835101925ADDGA1999+009144084GA2999+036584034GA3999+076204004GD12015+9999999GD23055+9999999GD34025+9999999GD40995+9999999GF108085999999999999999999MA1999999101865EQDN01 00000JPWTH 1QNNE11 1 00201E11 1 00406E11 1 00603E11 1 00099G11 1 00120H11 1 11030H11 1 23120H11 1 32250K11 1 00065L11 1 00700M11 1 30080N11 1 00000Q11 1 10192S11 1 00075V11 1 01010X11 1 17012
0350999999038531972123121005+30400-081417SAO  +0005NRB  V0201405N003150304849N011200599+02115+01785101745ADDGA1999+009144084GA2999+030484034GA3999+076204004GD12015+9999999GD23065+9999999GD34025+9999999GD40995+9999999GF108085999999999999999999MA1999999101695EQDN01 00000JPWTH 1QNNE11 1 00201E11 1 00407E11 1 00602E11 1 00099G11 1 00100H11 1 11030H11 1 23100H11 1 32250K11 1 00064L11 1 00700M11 1 30030N11 1 00000Q11 1 10174S11 1 00070V11 1 01010X11 1 14006

 

下面写一个Map-Reduce作业,求每年的最低温度。

2. 编写 JAVA处理类

MinTemperature.java 
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class MinTemperature {

        public static void main(String[] args) throws Exception {
            if(args.length != 2) {
                System.err.println("Usage: MinTemperature<input path> <output path>");
                System.exit(-1);
            }

            Job job = new Job();
            job.setJarByClass(MinTemperature.class);
            job.setJobName("Min temperature");
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            job.setMapperClass(MinTemperatureMapper.class);
            job.setReducerClass(MinTemperatureReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }

 

Mapper类

    import java.io.IOException;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    public class MinTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable>{

        private static final int MISSING = 9999;

        @Override 
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String line = value.toString();
            String year = line.substring(15, 19);

            int airTemperature;
            if(line.charAt(87) == '+') {
                airTemperature = Integer.parseInt(line.substring(88, 92));
            } else {
                airTemperature = Integer.parseInt(line.substring(87, 92));
            }

            String quality = line.substring(92, 93);
            if(airTemperature != MISSING && quality.matches("[01459]")) {
                context.write(new Text(year), new IntWritable(airTemperature));
            }
        }
    }

 

Reduce类

    import java.io.IOException;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    public class MinTemperatureReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

        @Override
        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

            int minValue = Integer.MAX_VALUE;
            for(IntWritable value : values) {
                minValue = Math.min(minValue, value.get());
            }
            context.write(key, new IntWritable(minValue));
        }
    }

 

3. 打JAR包

javac -classpath ../hadoop-core-1.1.2.jar *.java
jar cvf ./MinTemperature.jar ./Min*.class

 

4.设置数据源目录,并将数据文件放入

hadoop fs -mkdir -p /class5/in
hadoop fs -copyFromLocal temperature.txt /class5/in
hadoop fs -ls /class5/in

 

5. 运行程序

hadoop jar MinTemperature.jar MinTemperature /class5/in/temperature.txt  /class5/out

 

6. 查看结果

hadoop fs -ls /class5/out
hadoop fs -cat /class5/out/part-r-00000
1971 -461
1972 -267
1973 -390

 

posted on 2018-06-06 14:36  水共禾刀  阅读(323)  评论(0编辑  收藏  举报