使用python对hdfs进行操作

Python对hdfs进行操作需要用到hdfs库,安装方法如下:

pip install --user hdfs

使用python操作hadoop代码如下:

# -*- coding: utf-8 -*-

# @Time    : 2020/3/7 18:43
# @Author  : focksor
# @Email   : focksor@outlook.com
import os

from hdfs import Client


if __name__ == '__main__':
    hdfs_ip = "192.168.232.150"
    hdfs_version = 3
    hdfs_root = "/test"
    filepath = r"C:\Users\focks\OneDrive\DevEm\VMware\iso\ubuntu-18.04.1-desktop-amd64.iso"
    hdfs_addr = "http://" + hdfs_ip + ":" + str(9870 if (hdfs_version == 3) else 90070)

    client = Client(hdfs_addr)

    print("创建文件夹")
    client.makedirs(hdfs_root)
    print(client.list("/"))

    print("上传文件")
    client.upload(hdfs_root, filepath)
    print(client.list(hdfs_root))

    print("修改文件名")
    client.rename(hdfs_root + "/ubuntu-18.04.1-desktop-amd64.iso", hdfs_root + "/ubuntu.iso")
    print(client.list(hdfs_root))

    print("下载文件")
    client.download(hdfs_root + "/ubuntu.iso", ".")
    print(os.listdir("."))

    print("删除文件")
    client.delete(hdfs_root+"/ubuntu.iso")
    print(client.list(hdfs_root))


posted @ 2020-03-07 21:49  focksor  阅读(358)  评论(0编辑  收藏