中国 GDP(2015-2019 年)数据分析与可视化

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class Gdp5Mapper extends Mapper<LongWritable,Text,Text, DoubleWritable> {

    @Override
    protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
        //super.map(key,value,context);
        String line=value.toString();   //[美国    美洲    20.58万亿 (20,580,223,000,000)    23.8172%]
        String[] arr=line.split("\\t");
        FileSplit inputSplit = (FileSplit) context.getInputSplit();
        String name = inputSplit.getPath().getName ();
        String year=name.substring(0,4);  //直接提取文件名
//        System.out.println(year);
        if (arr.length>2) {
            String country=arr[0];
            if (country.contains("中国")){
                String gdp = arr[2].split(" ")[1].replace(",","").replace("(","").replace(")","");
                Long gdp1=Long.parseLong(gdp);  //转换为长整型
                double allgdp = Double.parseDouble(String.valueOf(gdp1/1000000000000.0));
                context.write(new Text(year),new DoubleWritable(allgdp));
            }

        }

    }
}
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class Gdp5Reducer extends Reducer<Text, DoubleWritable,Text,DoubleWritable> {
    @Override
    protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
        //super.reduce(key, values, context);
        double sum=0;
        for (DoubleWritable i:values){
            sum=i.get();
        }

        context.write(key,new DoubleWritable(Double.parseDouble(String.format("%.2f",sum))));
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.FileOutputStream;
import java.io.IOException;

public class Gdp5Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"gdp5");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(Gdp5Runner.class);
        job.setMapperClass(Gdp5Mapper.class);
        job.setReducerClass(Gdp5Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}

import pandas as pd
df=pd.read_csv(r"E:\output1\part-r-00000",sep='\t',header=None)
df.columns=['大洲名称','总GDP数']
df.head()

import matplotlib
from matplotlib import pyplot as plt
matplotlib.rcParams['font.family']='SimHei'
matplotlib.rcParams['font.sans-serif']=['SimHei']
# plt.figure(figsize=(10,6))
x=df['大洲名称']
y=df['总GDP数']
plt.bar(x,y,width=0.5,color='g',label='GDP')
plt.xlabel('大洲名称')
plt.ylabel('总GDP数(万亿)')
plt.title('各大洲总GDP图')
plt.legend(fontsize=12)
plt.show()

 

 
posted @ 2022-09-13 14:13  aq阿桂  阅读(287)  评论(0)    收藏  举报