猫眼电影网站出品_电影数_分析与可视化

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class Movie9Mapper extends Mapper<LongWritable,Text,Text,IntWritable> {

    @Override
    protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
        //super.map(key,value,context);
        String line=value.toString();   //[0 芳华 9.1 http://maoyan.com/films/1170264 剧情,爱情,战争 中国大陆 大陆上映 136 2017]
        String[] arr=line.split("\00");
        String country=null;
        if (arr.length>5){
            country=arr[5];
            if (country == null ||"".equals(country)){
                return;
//            }else if (country.indexOf(",")!=-1){
            }else if (country.contains(",")){
                country=country.split(",")[0];
//                System.out.println(country);
            }
            context.write(new Text(country),new IntWritable(1));
        }

    }
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class Movie9Reducer extends Reducer<Text, IntWritable,Text,IntWritable> {

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        //super.reduce(key, values, context);
        int sum=0;
        for (IntWritable i:values){
            sum+=i.get();
        }
        context.write(key,new IntWritable(sum));
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.FileOutputStream;
import java.io.IOException;

public class Movie9Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"maoyan");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(Movie9Runner.class);
        job.setMapperClass(Movie9Mapper.class);
        job.setReducerClass(Movie9Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['font.family']='SimHei'   
matplotlib.rcParams['font.sans-serif'] = ['SimHei']

data=pd.read_csv(r"E:\output9\part-r-00000",sep='\t',header=None)
data.columns=['国家','出品电影数']
data.head()

data=data.sort_values(by='出品电影数',ascending=False).head(10)
x=data['国家']
y=data['出品电影数']
plt.bar(x,y,width=0.5,label='数量')
plt.xlabel('国家名称')
plt.ylabel('上映电影出品数目')
plt.title('出品电影数前十的国家图表')
plt.legend(fontsize=12)
plt.show()

 

posted @ 2022-08-31 10:07  aq阿桂  阅读(96)  评论(0)    收藏  举报