HDFS 读取、写入、遍历文件夹获取文件全路径、append
                  					
					版权声明:本文为博主原创文章,未经博主同意不得转载。安金龙 的博客。					https://blog.csdn.net/smile0198/article/details/37573081				
								
								            
						1、从HDFS中读取数据
Configuration conf = getConf();
  Path path = new Path(pathstr); 
  FileSystem fs = FileSystem.get(conf);
   FSDataInputStream fsin= fs.open(path ); 
   BufferedReader br =null;
   String line ;
   try{
    br = new BufferedReader(new InputStreamReader(fsin));
       while ((line = br.readLine()) != null) {
         System.out.println(line);
        } 
   }finally{
    br.close();
   }
2、写HDFS
  Configuration conf = getConf();
  Path path = new Path(mid_sort); 
  FileSystem fs = FileSystem.get(conf); 
  FSDataOutputStream out = fs.create(resultpath);
  out.write(sb.toString().getBytes());
  out.close();
3、遍历文件夹 获取文件 全路径
/**
  * 得到一个文件夹(不包含子文件夹)下的全部名字匹配上pattern的文件名称
  * @param fs
  * @param folderPath
  * @param pattern 用于匹配文件名称的正则
  * @return
  * @throws IOException
  */
 public static List<Path> getFilesUnderFolder(FileSystem fs, Path folderPath, String pattern) throws IOException {
  List<Path> paths = new ArrayList<Path>();
  if (fs.exists(folderPath)) {
   FileStatus[] fileStatus = fs.listStatus(folderPath);
   for (int i = 0; i < fileStatus.length; i++) {
    FileStatus fileStatu = fileStatus[i];
    if (!fileStatu.isDir()) {//仅仅要文件
     Path oneFilePath = fileStatu.getPath();
     if (pattern == null) {
      paths.add(oneFilePath);
     } else {
      if (oneFilePath.getName().contains(pattern)) {
       paths.add(oneFilePath);
      }
     }  
    }
   }
  }
  return paths;
 }4、追加数据 append
  public static boolean appendRTData(String hdfsFile, String appendFile) {
    boolean flag = false;
    Configuration conf = new Configuration();
    FileSystem fs = null;
    try {
      fs = FileSystem.get(URI.create(hdfsFile), conf);
      InputStream in = new BufferedInputStream(new FileInputStream(appendFile));
      OutputStream out = fs.append(new Path(hdfsFile));
      IOUtils.copyBytes(in, out, 4096, true);
    } catch (IOException e) {
      e.printStackTrace();
    }
    return flag;
  }***********************************************************************************************************************************************
***********************************************************************************************************************************************
异常信息
1、Exception in thread "main" java.lang.IllegalArgumentException: java.net.UnknownHostException: ns6
原因是没有载入hdfs的配置信息,须要加入以下的代码:
conf.addResource(new Path("/xxxx/hdfs-site.xml"));//path是配置文件地址conf.addResource(new Path(System.getenv("HADOOP_CONF") + "/hdfs-site.xml")); 
                    
                     
                    
                 
                    
                
 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号