猫眼电影网站电影_评分数_分析与可视化

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.Scanner;


public class Movie7Mapper extends Mapper<LongWritable,Text,Text, DoubleWritable> {

@Override
protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
//super.map(key,value,context);
String line=value.toString(); //[0 芳华 9.1 http://maoyan.com/films/1170264 剧情,爱情,战争 中国大陆 大陆上映 136 2017]
String[] arr=line.split("\00");
String year=line.substring(line.length()-4); //从后往前读取
double mark=0;
if (arr.length>2){
mark=Double.parseDouble(arr[2]);
context.write(new Text(year),new DoubleWritable(mark));
}


}
}
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;

public class Movie7Reducer extends Reducer<Text, DoubleWritable,Text,DoubleWritable> {
    @Override
    protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
        //super.reduce(key, values, context);
        double sum=0;
        double sc=0;
        double  avg_mark;
        for (DoubleWritable i:values){
            sum+=i.get();
            sc++;
        }
        String str=String.format("%.2f",sum/sc);
        avg_mark=Double.parseDouble(str);
        context.write(key,new DoubleWritable(avg_mark));
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.FileOutputStream;
import java.io.IOException;

public class Movie7Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"maoyan");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(Movie7Runner.class);
        job.setMapperClass(Movie7Mapper.class);
        job.setReducerClass(Movie7Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['font.family']='SimHei'   
matplotlib.rcParams['font.sans-serif'] = ['SimHei']

df=pd.read_csv(r"E:\output7\part-r-00000",sep='\t',header=None)
df.columns=['年份','电影评分均值']
df.head()

plt.figure(figsize=(9,6))
x=df['年份']
y=df['电影评分均值']
plt.plot(x,y,label='均值')
plt.xlabel('上映时间')
plt.ylabel("评分均值")
# plt.xticks(rotation=30,fontsize=10)
plt.xlim(1980,2017)
plt.legend()
plt.title("每年上映电影评分均值趋势表")
plt.show()

 

posted @ 2022-08-31 09:59  aq阿桂  阅读(68)  评论(0)    收藏  举报