MapReduce 多路径输出

// run the job
  JobConf conf = new JobConf(TermDedication.class);
  conf.setJobName("getPairs&countDF");
  
   FileSystem fs = FileSystem.get(conf);
   if (fs.exists(output)) {
    fs.delete(output, true);
   }


  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(PDFOutputFormat.class);  

  conf.setMapOutputKeyClass(Text.class);
  conf.setMapOutputValueClass(Text.class);
  
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  
  FileInputFormat.setInputPaths(conf, input);
  FileOutputFormat.setOutputPath(conf, output);

 

 

 

public class PDFOutputFormat<K extends Text, V extends Text> extends MultipleOutputFormat<K, V> {

 private TextOutputFormat<K, V> theTextOutputFormat = null;
 @Override
 protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3) throws IOException {
  if (theTextOutputFormat == null) {
   theTextOutputFormat = new TextOutputFormat<K, V>();
  }
  return theTextOutputFormat.getRecordWriter(fs, job, name, arg3);
 }

 @Override
 protected String generateFileNameForKeyValue(K key, V value, String name) {
  String temp = key.toString();
  if(temp.startsWith("T:"))
   return "DF/"+name;
  else
   return "Pairs/"+name;
  
 }

}

如果conf中配置的路径为path 那么最后的多路径为path/DF/part-00000 和 path/Pairs/part-00000

posted on 2012-12-27 17:32  风花血月  阅读(205)  评论(0)    收藏  举报

导航