1 package com.mengyao.hadoop.mapreduce;
2
3 import java.io.IOException;
4 import java.text.SimpleDateFormat;
5 import java.util.Date;
6 import java.util.Vector;
7
8 import org.apache.hadoop.conf.Configuration;
9 import org.apache.hadoop.conf.Configured;
10 import org.apache.hadoop.fs.Path;
11 import org.apache.hadoop.io.LongWritable;
12 import org.apache.hadoop.io.NullWritable;
13 import org.apache.hadoop.io.Text;
14 import org.apache.hadoop.mapreduce.Counter;
15 import org.apache.hadoop.mapreduce.Job;
16 import org.apache.hadoop.mapreduce.Mapper;
17 import org.apache.hadoop.mapreduce.Reducer;
18 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
19 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
20 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
21 import org.apache.hadoop.util.Tool;
22 import org.apache.hadoop.util.ToolRunner;
23
24 public class MapJoinApp extends Configured implements Tool {
25
26 static class MapJoinMapper extends Mapper<LongWritable, Text, Text, Text> {
27
28 private Text k;
29 private Text v;
30
31 @Override
32 protected void setup(
33 Mapper<LongWritable, Text, Text, Text>.Context context)
34 throws IOException, InterruptedException {
35 k = new Text();
36 v = new Text();
37 }
38
39 @Override
40 protected void map(LongWritable key, Text value, Context context)
41 throws IOException, InterruptedException {
42 Counter errorLineCounter = context.getCounter("ErrorTotal", "errorLine");
43 Counter notFindCounter = context.getCounter("ErrorTotal", "notFind");
44 FileSplit inputSplit = (FileSplit) context.getInputSplit();
45 String fileName = inputSplit.getPath().getName();
46 String userAddressTable = "userAddressTable.txt";
47 String userTable = "userTable.txt";
48 if (fileName != null && !fileName.isEmpty()) {
49 if (fileName.equals(userAddressTable)) {
50 String[] fields = value.toString().split("\t");
51 String addressId = fields[0];
52 String address = fields[1];
53 k.set(addressId);
54 v.set("ua@"+address);
55 context.write(k, v);
56 System.err.println(k.toString()+"\t"+v.toString());
57 } else if(fileName.equals(userTable)) {
58 String[] fields = value.toString().split("\t");
59 String id = fields[0];
60 String name = fields[1];
61 String addressId = fields[2];
62 k.set(addressId);
63 v.set("u@"+id+"\t"+name);
64 System.err.println(k.toString()+"\t"+v.toString());
65 context.write(k, v);
66 } else {
67 notFindCounter.increment(1L);
68 }
69 } else {
70 errorLineCounter.increment(1L);
71 }
72 }
73 }
74
75 static class MapJoinReducer extends Reducer<Text, Text, NullWritable, Text> {
76 @Override
77 protected void reduce(Text key, Iterable<Text> value, Context context)
78 throws IOException, InterruptedException {
79 Vector<String> uArr = new Vector<String>();
80 Vector<String> uaArr = new Vector<String>();
81 for (Text text : value) {
82 String item = text.toString();
83 if (item.contains("ua@")) {
84 uaArr.add(item.split("ua@")[1]);
85 }
86 if (item.contains("u@")) {
87 uArr.add(item.split("u@")[1]);
88 }
89 }
90 int i;
91 for (i = 0; i < uArr.size(); i++) {
92 for (String uaItem : uaArr) {
93 context.write(NullWritable.get(), new Text(uArr.get(i) +"\t"+ uaItem));
94 }
95 }
96
97 }
98 }
99
100 @Override
101 public int run(String[] args) throws Exception {
102 Configuration conf = getConf();
103
104 conf.set("mapreduce.job.jvm.numtasks", "-1");
105 conf.set("mapreduce.map.speculative", "false");
106 conf.set("mapreduce.reduce.speculative", "false");
107 conf.set("mapreduce.map.maxattempts", "4");
108 conf.set("mapreduce.reduce.maxattempts", "4");
109 conf.set("mapreduce.map.skip.maxrecords", "0");
110 Job job = Job.getInstance(conf, MapJoinApp.class.getSimpleName());
111 job.setJarByClass(MapJoinApp.class);
112
113 FileInputFormat.addInputPath(job, new Path(args[0]));
114 FileOutputFormat.setOutputPath(job, new Path(args[1]));
115
116 job.setMapperClass(MapJoinMapper.class);
117 job.setMapOutputKeyClass(Text.class);
118 job.setMapOutputValueClass(Text.class);
119
120 // job.setCombinerClass(JoinReducer.class);
121
122 job.setReducerClass(MapJoinReducer.class);
123 job.setOutputKeyClass(NullWritable.class);
124 job.setOutputValueClass(Text.class);
125
126 return job.waitForCompletion(true)?0:1;
127 }
128
129 public static int createJob(String[] params){
130 Configuration conf = new Configuration();
131 int status = 1;
132 try {
133 status = ToolRunner.run(conf, new MapJoinApp(), params);
134 } catch (Exception e) {
135 e.printStackTrace();
136 throw new RuntimeException(e);
137 }
138
139 return status;
140 }
141
142 public static void main(String[] args) {
143 args = new String[2];
144 args[0] = "/testdata/user*";
145 args[1] = "/job/mapreduce/"+MapJoinApp.class.getSimpleName()+"_"+new SimpleDateFormat("yyyyMMddhhMMss").format(new Date());
146 if (args.length != 2) {
147 System.out.println("Usage: "+MapJoinApp.class.getSimpleName()+" <in> <out>");
148 System.exit(2);
149 } else {
150 int status = createJob(args);
151 System.exit(status);
152 }
153 }
154
155
156 }