public class WordCount {
public static class TokenizerMapper extends
Mapper<Object, Text, Text, Text> {
Map<String, String> map = new HashMap<String, String>();
private Text word = new Text();
Path[] words;
protected void setup(Context context) throws IOException,
InterruptedException {
words = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String path = words[0].toString();
String line;
String[] tokens;
BufferedReader joinReader = new BufferedReader(new FileReader(path+"/456.txt"));
while ((line = joinReader.readLine()) != null) {
tokens = line.split(",");
map.put(tokens[0], tokens[1]);
}
joinReader.close();
}
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word,new Text(map.get("1") + map.get("2")+map.get("3")+map.get("4")+map.get("5")));
}
}
}
public static class IntSumReducer extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String url = "";
for (Text val : values) {
url = url + val.toString();
}
context.write(key, new Text(url));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
DistributedCache.addCacheFile(new URI("/test/output"),conf);
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}