import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class GDP4Mapper extends Mapper<LongWritable, Text,Text, DoubleWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] arr = line.split("\t");
String contunent = null;
if (arr.length > 1) {
contunent = arr[1];
String continent = arr[1];
if (continent.length() != 0) {
String[] gdp = arr[2].split(" ");
String gdp1 = gdp[1].replace(",", "").replace("(", "").replace(")", "");
double allgdp = Double.parseDouble(gdp1) * 0.000000000001;
context.write(new Text(contunent), new DoubleWritable(allgdp));
}
}
}
}
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class GDP4Reducer extends Reducer<Text,DoubleWritable,Text, DoubleWritable> {
@Override
protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
//super.reduce(key, values, context);
double sum=0; //是每个地区的岗位数量和
for(DoubleWritable i :values){
sum+=i.get(); //i.get()是把IntWritable转成int
}
context.write(key,new DoubleWritable(sum)); //reducer的输出结果
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.FileOutputStream;
import java.io.IOException;
public class GDP4Runner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
//创建job
Job job= Job.getInstance(conf,"gdp4");
//设置输入输出路径
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//设置运行类
job.setJarByClass(GDP4Runner.class);
job.setMapperClass(GDP4Mapper.class);
job.setReducerClass(GDP4Reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['font.family']='SimHei'
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
data=pd.read_csv(r"D:\oc\shiyun1\技能抽查模块3数据源\inputgdp\output4\part-r-00000",sep='\t',header=None)
data.columns=['大洲名称','总GDP数']
data
plt.figure(figsize=(10,6))
x=data['大洲名称']
y=data['总GDP数']
plt.bar(x,y,color='g',width=0.5,label='总GDP数')
plt.xlabel('大洲名称')
plt.ylabel('总GDP数')
plt.title('各大洲总GDP图')
plt.legend(fontsize=12)
plt.show()