北京 2019 年空气 AQI 数据分析与可视化

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class Air17Mapper extends Mapper<LongWritable,Text,Text, DoubleWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line=value.toString();
        //分割每一行
        String[] arr=line.split( "\t");
        String month=arr[0].substring(0,7); //取年月
        if(arr.length>3) {
            double aqi=0;
            aqi = Double.parseDouble(arr[2]);//改变类型
            context.write(new Text(month),new DoubleWritable(aqi));
        }
    }
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
public class Air17Reducer extends Reducer<Text,DoubleWritable,Text,DoubleWritable> {

    @Override
    protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
        double sum=0;
        double index = 0;
        for(DoubleWritable i :values){
            sum+=i.get();
            index++;
        }
        context.write(key,new DoubleWritable(Double.parseDouble(String.format("%1.1f",sum/index))));  //reducer的输出结果
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Air17Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"air17");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(Air17Runner.class);
        job.setMapperClass(Air17Mapper.class);
        job.setReducerClass(Air17Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['font.family']='SimHei'   
matplotlib.rcParams['font.sans-serif'] = ['SimHei']

data=pd.read_csv(r"D:\output17\part-r-00000",sep='\t',header=None)
data.columns=['日期','每月AQI均值']
data.head()
plt.figure(figsize=(9,6))
x=data['日期']
y=data['每月AQI均值']
plt.plot(x,y,label='每月AQI均值')
plt.xlabel('日期')
plt.ylabel("每月AQI均值")
plt.ylim(0,90)
plt.xticks(rotation=30,fontsize=10)
for a,b in zip(x,y):
plt.text(a,b,b,ha='center',va='bottom',fontsize=10)
plt.legend()
plt.title("2019 年北京全年 AQI 指数趋势")
plt.show()
 

 

posted @ 2022-09-07 15:30  aq阿桂  阅读(218)  评论(0)    收藏  举报