51JOB网站_岗位分类_数据分析与可视化

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.DoubleWritable;

import java.io.IOException;

public class Job53Mapper extends Mapper<LongWritable,Text,Text, IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //supper.map(key,value,context);
        //一行一行读,然后将每一行文本转成字符串
        String line=value.toString();
        //分割每一行
        String[] arr=line.split( "\t"); //[大数据开发工程师 上海吉祥航空股份有限公司 上海]
        String position="";
        String post = null;
        if (arr.length > 1){
            post = arr[0];
            if (post.contains("开发") || post.contains("工程师")) {
                position="开发工程师";
            }
            else if (post.contains("分析") || post.contains("数据") ) {
                position="数据分析师";
            }
            else if (post.contains("运营")) {
                position="运营人员" ;
            }
            else if (post.contains("产品")) {
                position="产品经理";
            }
            else if(post.contains("架构")) {
                position="架构师";
            }
            else if (post.contains("运维")) {
                position="运维工程师";
            }
            else{
                position="其他";
            }
            context.write(new Text(position),new IntWritable(1));
        }
    }
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

//输入:Mapper的输出<上海,1>   <苏州,1>  <上海,1>
//<上海,<1,1,1>>
//Reducer的输出  <上海,2300>
public class Job53Reducer extends Reducer<Text,IntWritable,Text,IntWritable> {

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        //super.reduce(key, values, context);
        int sum=0;   //是每个地区的岗位数量和
        for(IntWritable i :values){
            sum+=i.get();   //i.get()是把IntWritable转成int
        }
        context.write(key,new IntWritable(sum));  //reducer的输出结果
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.FileOutputStream;
import java.io.IOException;

public class Job53Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"job53");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(Job53Runner.class);
        job.setMapperClass(Job53Mapper.class);
        job.setReducerClass(Job53Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['font.family']='SimHei'   
matplotlib.rcParams['font.sans-serif'] = ['SimHei']

data=pd.read_csv(r"E:\output3\part-r-00000",sep='\t',header=None,names=['职位名称','职位数'])
data

plt.figure(figsize=(10,8))
plt.pie(data['职位数'],labels=data['职位名称'],autopct='%3.2f%%')
plt.title("职位分布占比图")
plt.legend(loc='upper left',bbox_to_anchor=(-0.5,1.0))
plt.show()

 

posted @ 2022-08-31 09:41  aq阿桂  阅读(60)  评论(0)    收藏  举报