51JOB网站大数据_岗位数据_分析与可视化

Job51Mapper

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


//keyin:行号,valuein:Text ,

//输出<上海,1>   <苏州,1>
public class Job51Mapper extends Mapper<LongWritable,Text,Text,IntWritable>{

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
       // super.map(key, value, context);
        //一行一行的读,然后将每一行文本转成字符串
        String  line=value.toString();
        //分割每一行
        String[] arr=line.split("\\t");  //[大数据开发工程师      上海吉祥航空股份有限公司    上海]
        if(arr.length>2){
            String city=arr[2];
            //判断地区是否包含“-”
            int index=city.indexOf("-");
            if(index>0){   //条件满足的话说明地区包含“-”,例如“深圳-龙华新区”,利用subString截取-前面的数据
                city=city.substring(0,index);
            }
            context.write(new Text(city),new IntWritable(1));
        }

    }
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

//输入:Mapper的输出<上海,1>   <苏州,1>  <上海,1>
//<上海,<1,1,1>>
//Reducer的输出  <上海,2300>
public class Job51Reducer extends Reducer<Text,IntWritable,Text,IntWritable> {

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        //super.reduce(key, values, context);
        int sum=0;   //是每个地区的岗位数量和
        for(IntWritable i :values){
            sum+=i.get();   //i.get()是把IntWritable转成int
        }
        context.write(key,new IntWritable(sum));  //reducer的输出结果
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.FileOutputStream;
import java.io.IOException;

public class Job51Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"job51");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(Job51Runner.class);
        job.setMapperClass(Job51Mapper.class);
        job.setReducerClass(Job51Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['font.family']='SimHei'   
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
data = pd.read_csv("E:\output1\part-r-00000",encoding="utf-8",delimiter='\t')
data.columns = ['地区名称','岗位招聘数']
data.head()
data=data.head(5) #岗位招聘数利用降序挑选前五名出来
plt.figure(figsize=(10,6))
x=data['地区名称']
y=data['岗位招聘数']
plt.bar(x,y,color='r',width=.3,label='岗位数量')
plt.xlabel('城市名称')
plt.ylabel('岗位数量')
plt.title('岗位招聘数前五的城市')
plt.legend(fontsize=12)
plt.show()

 

posted @ 2022-08-31 08:56  aq阿桂  阅读(180)  评论(0)    收藏  举报