MapReduce 多路径输出
// run the job
JobConf conf = new JobConf(TermDedication.class);
conf.setJobName("getPairs&countDF");
FileSystem fs = FileSystem.get(conf);
if (fs.exists(output)) {
fs.delete(output, true);
}
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(PDFOutputFormat.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, input);
FileOutputFormat.setOutputPath(conf, output);
public class PDFOutputFormat<K extends Text, V extends Text> extends MultipleOutputFormat<K, V> {
private TextOutputFormat<K, V> theTextOutputFormat = null;
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3) throws IOException {
if (theTextOutputFormat == null) {
theTextOutputFormat = new TextOutputFormat<K, V>();
}
return theTextOutputFormat.getRecordWriter(fs, job, name, arg3);
}
@Override
protected String generateFileNameForKeyValue(K key, V value, String name) {
String temp = key.toString();
if(temp.startsWith("T:"))
return "DF/"+name;
else
return "Pairs/"+name;
}
}
如果conf中配置的路径为path 那么最后的多路径为path/DF/part-00000 和 path/Pairs/part-00000
浙公网安备 33010602011771号