import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
//keyin:行号 , valuein:Text,
public class maoyanMapper extends Mapper<LongWritable,Text,Text, IntWritable> {
public static String spiltRtoL(String s) {
StringBuffer sb = new StringBuffer();
int length = s.length();
char[] c = new char[length];
for (int i = 0; i < length; i++) {
c[i] = s.charAt(i);
}
for (int i = length - 1; i >= 0; i--) {
sb.append(c[i]);
}
return sb.toString();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//supper.map(key,value,context);
//一行一行读,然后将每一行文本转成字符串
String line=value.toString();
//分割每一行
String l = spiltRtoL(line);
String y=l.substring(0,4);
String year = spiltRtoL(y);
context.write(new Text(year),new IntWritable(1));
}
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class maoyanReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
//super.reduce(key, values, context);
int sum=0; //是每个地区的岗位数量和
for(IntWritable i :values){
sum+=i.get(); //i.get()是把IntWritable转成int
}
context.write(key,new IntWritable(sum)); //reducer的输出结果
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.FileOutputStream;
import java.io.IOException;
public class maoyanRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
//创建job
Job job= Job.getInstance(conf,"movie6");
//设置输入输出路径
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//设置运行类
job.setJarByClass(maoyanRunner.class);
job.setMapperClass(maoyanMapper.class);
job.setReducerClass(maoyanReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['font.family']='SimHei'
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
data=pd.read_csv(r"E:\output6\part-r-00000",sep='\t',header=None)
data.columns=['年份','电影数目']
data.head()
plt.figure(figsize=(9,6))
y=data['电影数目']
plt.plot(y,label='年份')
plt.xlabel('时间(年)')
plt.ylabel("上映数量")
plt.legend()
plt.title("电影年份对应的上映电影数目趋势表")
plt.show()