2018年的时候写HDFS项目时候遗留的笔记,Scala简单操作HDFS。

点击查看代码
package info.aoye.hadoop

import java.io.ByteArrayInputStream
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
import org.apache.commons.lang.StringUtils
import org.apache.zookeeper.common.IOUtils

import scala.collection.mutable.{ArrayBuffer, ListBuffer}


/**
  * @author duchaoqun
  * @since 2018-08-09
  */
object DemoHdfs1 extends scala.App {
  val hdfsPath = "hdfs://172.17.2.135:8020/user/"

  /**
    * Make a new dir in the hdfs!
    *
    * @param dir dir name
    * @return true or false
    */
  def createDir(dir: String): Boolean = {
    val dirPath = hdfsPath + dir // 留意拼接的这个dir, 如果前面的url 最后面没有斜杠, 这里会创建到上一级目录中.

    // 验证路径是否正确
    if (StringUtils.isBlank(dirPath)) {
      false
    } else {
      // 创建HDFS对象, todo 如何判断创建失败了?
      val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
      if (!fileSystem.exists(new Path(dirPath))) {
        fileSystem.mkdirs(new Path(dirPath))
      }
      fileSystem.close()
      true
    }
  }


  /**
    * 删除HDFS上的目录
    *
    * @param dir dir name
    * @return true or false
    */
  def deleteDir(dir: String): Boolean = {
    val dirPath = hdfsPath + dir // 留意拼接的这个dir, 如果前面的url 最后面没有斜杠, 这里会创建到上一级目录中.
    if (StringUtils.isBlank(dir)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
      //fileSystem.delete(new Path(dir),true)
      fileSystem.deleteOnExit(new Path(dirPath))
      fileSystem.close()
      true
    }
  }

  /**
    * 删除HDFS上的文件
    *
    * @param filePath HDFS上文件的绝对路径
    * @return true or false
    */
  def deleteFile(filePath: String): Boolean = {
    if (StringUtils.isBlank(filePath)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(filePath), new Configuration(), "hdfs")
      fileSystem.deleteOnExit(new Path(filePath))
      fileSystem.close()
      true
    }
  }

  //deleteFile("hdfs://172.17.2.135:8020/user/test2.txt")

  def listAll(dirPath: String): Option[List[String]] = {
    if (StringUtils.isBlank(dirPath)) {
      None
    } else {
      val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
      val fileStatus = fileSystem.listStatus(new Path(dirPath))
      val statusListBuffer = new ListBuffer[String]
      fileStatus.foreach(x => statusListBuffer += x.getPath.toString)
      //fileStatus.foreach(x => println(x.getPath.toString))
      Some(statusListBuffer.toList)
    }
  }

  listAll("hdfs://172.17.2.135:8020/user/") match {
    case Some(s) => s.foreach(println(_))
    case None => println("Get Nothing!")
  }


  /**
    * 上传本地文件到HDFS上
    *
    * @param localFile 本地文件位置(包含绝对路径信息)
    * @param hdfsFile  在HDFS上保存的名字
    * @param hdfsPath  在HDFS上保存的路径
    * @return true or false
    */
  def uploadLocalToHDFS(localFile: String, hdfsFile: String, hdfsPath: String): Boolean = {
    if (StringUtils.isBlank(localFile) && StringUtils.isBlank(hdfsFile) && StringUtils.isBlank(hdfsPath)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(hdfsPath), new Configuration(), "hdfs")
      fileSystem.copyFromLocalFile(new Path(localFile), new Path(hdfsPath + hdfsFile))
      fileSystem.close()
      true
    }
  }

  //uploadLocalToHDFS("C:\\Users\\ducha\\Documents\\test1.txt", "test1.txt", "hdfs://172.17.2.135:8020/user/")


  /**
    * 在hdfs上创建新文件
    *
    * @param fileName    文件名称
    * @param fileContent 文件内容
    * @param hdfsPath    文件路径
    * @return
    */
  def createFile(fileName: String, fileContent: String, hdfsPath: String): Boolean = {
    if (StringUtils.isBlank(fileName) && StringUtils.isBlank(hdfsPath)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(hdfsPath), new Configuration(), "hdfs")
      val fsDataOutputStream = fileSystem.create(new Path(hdfsPath + fileName))
      fsDataOutputStream.write(fileContent.getBytes("UTF-8"))
      fsDataOutputStream.close()
      fileSystem.close()
      true
    }
  }

  //createFile("test3.txt", "测试数据!!", "hdfs://172.17.2.135:8020/user/")

  /**
    * 读取HDFS上文件内容
    *
    * @param filePath HDFS的文件路径
    * @return Array[Byte]
    */
  def readFile(filePath: String): Option[Array[Byte]] = {
    if (StringUtils.isBlank(filePath)) {
      None
    } else {
      val fileSystem = FileSystem.get(URI.create(filePath), new Configuration(), "hdfs")
      val file = new Path(filePath)
      if (fileSystem.exists(file)) {
        val fSDataInputStream = fileSystem.open(file)
        //val fileStatus = fileSystem.getStatus(file)
        val fileStatus = fileSystem.getFileStatus(file)
        val bufferedByte = new Array[Byte](fileStatus.getLen.toInt)

        fSDataInputStream.readFully(0, bufferedByte)
        fSDataInputStream.close()
        fileSystem.close()
        Some(bufferedByte) //todo 返回的是Byte数组, 这里还有内容可以完善.
      } else {
        throw new Exception("Something wrong!!")
      }
    }
  }

  //println(readFile("hdfs://172.17.2.135:8020/user/test3.txt").get)


  /**
    * 向HDFS文件里面添加一些内容
    * @param filePath 文件路径
    * @param content 待添加内容
    * @return
    */
  def append(filePath: String, content: String): Boolean = {
    if (StringUtils.isBlank(filePath)) {
      false
    }
    if (StringUtils.isEmpty(content)) {
      true
    }
    val configuration = new Configuration()
    // todo solve the problem when appending at single datanode hadoop env?
    configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER")
    configuration.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true")
    val fileSystem = FileSystem.get(URI.create(filePath), configuration, "hdfs")
    if (fileSystem.exists(new Path(filePath))) {
      try {
        val inputStream = new ByteArrayInputStream(content.getBytes())
        val fsDataOutputStream = fileSystem.append(new Path(filePath))
        IOUtils.copyBytes(inputStream, fsDataOutputStream, 4096, true);

        fsDataOutputStream.close()
        inputStream.close()
        fileSystem.close()
      } catch {
        case e: Exception => e.printStackTrace()
      }
    } else{
      //createFile()
      true
    }
    true
  }
  //append("hdfs://172.17.2.135:8020/user/test3.txt","Dura!")

  // todo 用户环境变量 HADOOP_USER_NAME = hdfs , FileSystem会使用到这个变量,
}

posted @ 2023-04-03 17:08  duchaoqun  阅读(22)  评论(0)    收藏  举报