猫眼电影网站出品_电影数_分析与可视化
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class Movie9Mapper extends Mapper<LongWritable,Text,Text,IntWritable> { @Override protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{ //super.map(key,value,context); String line=value.toString(); //[0 芳华 9.1 http://maoyan.com/films/1170264 剧情,爱情,战争 中国大陆 大陆上映 136 2017] String[] arr=line.split("\00"); String country=null; if (arr.length>5){ country=arr[5]; if (country == null ||"".equals(country)){ return; // }else if (country.indexOf(",")!=-1){ }else if (country.contains(",")){ country=country.split(",")[0]; // System.out.println(country); } context.write(new Text(country),new IntWritable(1)); } } }
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class Movie9Reducer extends Reducer<Text, IntWritable,Text,IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //super.reduce(key, values, context); int sum=0; for (IntWritable i:values){ sum+=i.get(); } context.write(key,new IntWritable(sum)); } }
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.FileOutputStream; import java.io.IOException; public class Movie9Runner { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); //创建job Job job= Job.getInstance(conf,"maoyan"); //设置输入输出路径 FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //设置运行类 job.setJarByClass(Movie9Runner.class); job.setMapperClass(Movie9Mapper.class); job.setReducerClass(Movie9Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true)?0:1); } }
import pandas as pd import matplotlib import matplotlib.pyplot as plt matplotlib.rcParams['font.family']='SimHei' matplotlib.rcParams['font.sans-serif'] = ['SimHei'] data=pd.read_csv(r"E:\output9\part-r-00000",sep='\t',header=None) data.columns=['国家','出品电影数'] data.head() data=data.sort_values(by='出品电影数',ascending=False).head(10) x=data['国家'] y=data['出品电影数'] plt.bar(x,y,width=0.5,label='数量') plt.xlabel('国家名称') plt.ylabel('上映电影出品数目') plt.title('出品电影数前十的国家图表') plt.legend(fontsize=12) plt.show()