1 package seven.ili.patent;
2
3 import java.io.IOException;
4
5 import org.apache.hadoop.conf.Configuration;
6 import org.apache.hadoop.conf.Configured;
7 import org.apache.hadoop.fs.Path;
8 import org.apache.hadoop.io.IntWritable;
9 import org.apache.hadoop.io.LongWritable;
10 import org.apache.hadoop.io.Text;
11 import org.apache.hadoop.mapreduce.Job;
12 import org.apache.hadoop.mapreduce.Mapper;
13 import org.apache.hadoop.mapreduce.Partitioner;
14 import org.apache.hadoop.mapreduce.Reducer;
15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
16 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
17 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
18 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
19 import org.apache.hadoop.util.Tool;
20 import org.apache.hadoop.util.ToolRunner;
21
22 public class AgeStatistics extends Configured implements Tool {
23 public static class MapClass extends Mapper<LongWritable, Text, Text, Text> {
24 public void map(LongWritable key, Text value, Context context)
25 throws IOException, InterruptedException {
26
27 String[] str = value.toString().split("\t", -2);
28 String gender = str[2];
29 context.write(new Text(gender), new Text(value));
30 }
31 }
32
33 public static class Reduce extends Reducer<Text, Text, Text, IntWritable> {
34 public int max = -1;
35 public void reduce(Text key, Iterable<Text> values, Context context)
36 throws IOException, InterruptedException {
37 max = -1;
38 for (Text val : values) {
39 String[] str = val.toString().split("\t", -2);
40 if (Integer.parseInt(str[3]) > max)
41 max = Integer.parseInt(str[3]);
42 }
43 context.write(new Text(key), new IntWritable(max));
44 }
45 }
46
47 public static class AgePartitioner extends Partitioner<Text, Text>{
48 @Override
49 public int getPartition(Text key, Text value, int numReduceTasks) {
50 String[] str = value.toString().split("\t");
51 int age = Integer.parseInt(str[1]);
52 if (numReduceTasks == 0){
53 return 0;
54 }
55 if (age <= 20)
56 return 0;
57 else if (age > 20 && age <= 50)
58 return 1 % numReduceTasks;
59 else
60 return 2 % numReduceTasks;
61 }
62 }
63
64
65 public int run(String[] args) throws Exception {
66 Configuration conf = getConf();
67 Job job = new Job(conf, "TopKNum");
68 job.setJarByClass(AgeStatistics.class);
69 FileInputFormat.setInputPaths(job, new Path(args[0]));
70 FileOutputFormat.setOutputPath(job, new Path(args[1]));
71 job.setMapperClass(MapClass.class);
72 job.setMapOutputKeyClass(Text.class);
73 job.setMapOutputValueClass(Text.class);
74 //job.setCombinerClass(Reduce.class);
75 job.setPartitionerClass(AgePartitioner.class);
76 job.setReducerClass(Reduce.class);
77 job.setNumReduceTasks(3);
78 job.setInputFormatClass(TextInputFormat.class);
79 job.setOutputFormatClass(TextOutputFormat.class);
80 job.setOutputKeyClass(Text.class);
81 job.setOutputValueClass(IntWritable.class);
82 System.exit(job.waitForCompletion(true) ? 0 : 1);
83 return 0;
84 }
85 public static void main(String[] args) throws Exception {
86 int res = ToolRunner.run(new Configuration(), new AgeStatistics(), args);
87 System.exit(res);
88 }
89
90 }