hdfs操作(转)

 

https://blog.csdn.net/baidu_28997655/article/details/81663668

一、命令行操作

1.显示目录信息:

[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls /

[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls /
Found 2 items
drwx------   - root supergroup          0 2018-08-13 19:06 /tmp
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
  • 1
  • 2
  • 3
  • 4
  • 5

2.显示多级目录:

[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /

3.创建多级目录:

[root@hadoop001 hadoop-2.6.5]# hadoop fs -mkdir -p /aaa/bbb/cc/dd

[root@hadoop001 hadoop-2.6.5]# hadoop fs  -mkdir  -p  /aaa/bbb/cc/dd
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:54 /aaa
drwxr-xr-x   - root supergroup          0 2018-08-13 19:54 /aaa/bbb
drwxr-xr-x   - root supergroup          0 2018-08-13 19:54 /aaa/bbb/cc
drwxr-xr-x   - root supergroup          0 2018-08-13 19:54 /aaa/bbb/cc/dd
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7

4.将文件从本地剪切到hdfs

[root@hadoop001 hadoop-2.6.5]# hadoop fs -moveFromLocal input/log.txt /user/data/

[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 19:58 /user/data
[root@hadoop001 hadoop-2.6.5]# hadoop  fs  -moveFromLocal  input/log.txt  /user/data/
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 19:59 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9

5.将文件从本地拷贝到hdfs

[root@hadoop001 hadoop-2.6.5]# hadoop fs -copyFromLocal input/phone_data.txt /user/data/

[root@hadoop001 hadoop-2.6.5]# hadoop fs -copyFromLocal input/phone_data.txt /user/data/
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:02 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:02 /user/data/phone_data.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7

[root@hadoop001 hadoop-2.6.5]# hadoop fs -put input/phone_data.txt /user/data/

[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:03 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
[root@hadoop001 hadoop-2.6.5]# hadoop fs -put input/phone_data.txt /user/data/
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:04 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:04 /user/data/phone_data.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11

6.将文件从hdfs粘贴到本地

hadoop fs - moveToLocal /aaa/bbb/cc/dd /home/hadoop/a.txt

这条命令在hadoop-2.6.5不能使用

7.将文件从hdfs复制到本地

[root@hadoop001 hadoop-2.6.5]# hadoop fs -get /user/data/log.txt input/

[root@hadoop001 hadoop-2.6.5]# hadoop fs -get /user/data/log.txt input/
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:04 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:04 /user/data/phone_data.txt
[root@hadoop001 hadoop-2.6.5]# ll input/
total 52
-rw-r--r--. 1 root root 39654 Aug 13 20:06 log.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

[root@hadoop001 hadoop-2.6.5]# hadoop fs -copyToLocal /user/data/phone_data.txt input/

[root@hadoop001 hadoop-2.6.5]# ll input/
total 48
-rw-r--r--. 1 root root 39654 Aug 13 20:06 log.txt
-rwxrw-rw-. 1 root root   116 Aug 13 19:10 part-r-00000
-rw-r--r--. 1 root root    53 Aug 13 01:27 xiaoxiao.txt
[root@hadoop001 hadoop-2.6.5]# hadoop fs -copyToLocal /user/data/phone_data.txt input/
[root@hadoop001 hadoop-2.6.5]# ll input/
total 52
-rw-r--r--. 1 root root 39654 Aug 13 20:06 log.txt
-rwxrw-rw-. 1 root root   116 Aug 13 19:10 part-r-00000
-rw-r--r--. 1 root root  1429 Aug 13 20:12 phone_data.txt
-rw-r--r--. 1 root root    53 Aug 13 01:27 xiaoxiao.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13

8.追加一个文件到另一个文件的结尾

[root@hadoop001 hadoop-2.6.5]# hadoop fs -appendToFile input/xiaoxiao.txt /user/data/xiaoxiao.txt

[root@hadoop001 hadoop-2.6.5]# hadoop fs -appendToFile input/xiaoxiao.txt /user/data/xiaoxiao.txt
[root@hadoop001 hadoop-2.6.5]# hadoop fs -cat /user/data/xiaoxiao.txt
hello world sb
nimabi s hello      //追加一串相同的数据进去
hhs
 shh
ssh
hhs
world
hello world sb
nimabi s hello
hhs
 shh
ssh
hhs
world
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

9.显示文件内容

[root@hadoop001 hadoop-2.6.5]# hadoop fs -cat /user/data/xiaoxiao.txt

[root@hadoop001 hadoop-2.6.5]# hadoop fs -cat /user/data/xiaoxiao.txt
hello world sb
nimabi s hello
hhs
 shh
ssh
hhs
world
hello world sb
nimabi s hello
hhs
 shh
ssh
hhs
world
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15

10.显示一个文件的末尾

[root@hadoop001 hadoop-2.6.5]# hadoop fs -tail /user/data/xiaoxiao.txt

当文件内容较少时,会全部显示出来。

[root@hadoop001 hadoop-2.6.5]# hadoop fs -tail /user/data/xiaoxiao.txt
hello world sb
nimabi s hello
hhs
 shh
ssh
hhs
world
hello world sb
nimabi s hello
hhs
 shh
ssh
hhs
world
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

11.修改文件权限

hadoop fs -chmod 777 /user/data/xiaoxiao.txt

[root@hadoop001 hadoop-2.6.5]# hadoop fs -chmod 777 /user/data/xiaoxiao.txt
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:15 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:04 /user/data/phone_data.txt
#该文件被修改了权限
-rwxrwxrwx   3 root supergroup        106 2018-08-13 20:15 /user/data/xiaoxiao.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

12.修改文件所属

[root@hadoop001 hadoop-2.6.5]# hadoop fs -chown j:j /user/data/xiaoxiao.txt

[root@hadoop001 hadoop-2.6.5]# hadoop fs -chown j:j /user/data/xiaoxiao.txt
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 19:03 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:15 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:04 /user/data/phone_data.txt
-rwxrwxrwx   3 j    j                 106 2018-08-13 20:15 /user/data/xiaoxiao.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

13.从hdfs的一个路径拷贝到hdfs的另一个路径

[root@hadoop001 hadoop-2.6.5]# hadoop fs -cp /user/data/log.txt /user/log.txt

[root@hadoop001 hadoop-2.6.5]# hadoop fs -cp /user/data/log.txt /user/log.txt
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 20:32 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:15 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:04 /user/data/phone_data.txt
-rwxrwxrwx   3 j    j                 106 2018-08-13 20:15 /user/data/xiaoxiao.txt
-rw-r--r--   3 root supergroup      39654 2018-08-13 20:32 /user/log.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9

14.在hdfs目录中移动文件

[root@hadoop001 hadoop-2.6.5]# hadoop fs -mv /user/log.txt /

[root@hadoop001 hadoop-2.6.5]# hadoop  fs  -mv  /user/log.txt  /
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
-rw-r--r--   3 root supergroup      39654 2018-08-13 20:32 /log.txt
  • 1
  • 2
  • 3
  • 4

15.删除文件或文件夹

[root@hadoop001 hadoop-2.6.5]# hadoop fs -rm -r /log.txt

[root@hadoop001 hadoop-2.6.5]# hadoop fs -rm -r /log.txt
18/08/13 20:37:05 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.
Deleted /log.txt
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 20:34 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:15 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:04 /user/data/phone_data.txt
-rwxrwxrwx   3 j    j                 106 2018-08-13 20:15 /user/data/xiaoxiao.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

16.删除空目录

[root@hadoop001 hadoop-2.6.5]# hadoop fs -rmdir /aaa/bbb/ccc

[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 20:39 /aaa
drwxr-xr-x   - root supergroup          0 2018-08-13 20:39 /aaa/bbb
drwxr-xr-x   - root supergroup          0 2018-08-13 20:39 /aaa/bbb/ccc
[root@hadoop001 hadoop-2.6.5]# hadoop  fs  -rmdir   /aaa/bbb/ccc
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 20:39 /aaa
drwxr-xr-x   - root supergroup          0 2018-08-13 20:40 /aaa/bbb
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9

17.统计文件系统的可用空间信息

[root@hadoop001 hadoop-2.6.5]# hadoop fs -df -h /

[root@hadoop001 hadoop-2.6.5]# hadoop  fs  -df  -h  /
Filesystem               Size     Used  Available  Use%
hdfs://hadoop001:8020  53.1 G  237.9 K     26.0 G    0%
  • 1
  • 2
  • 3
  • 4

18.统计文件夹的大小信息

[root@hadoop001 hadoop-2.6.5]# hadoop fs -du -s -h /user

[root@hadoop001 hadoop-2.6.5]# hadoop  fs  -du  -s  -h /user
40.2 K  /user
  • 1
  • 2
  • 3

19.统计一个指定目录下的文件节点数量

[root@hadoop001 hadoop-2.6.5]# hadoop fs -count /user/
# 2表示该路径下目录有2层,3表示文件数量有3个
           2            3              41189 /user
[root@hadoop001 hadoop-2.6.5]# hadoop fs -ls -R /
drwxr-xr-x   - root supergroup          0 2018-08-13 20:39 /aaa
drwxr-xr-x   - root supergroup          0 2018-08-13 20:40 /aaa/bbb
drwxr-xr-x   - root supergroup          0 2018-08-13 20:34 /user
drwxr-xr-x   - root supergroup          0 2018-08-13 20:15 /user/data
-rw-r--r--   3 root supergroup      39654 2018-08-13 19:59 /user/data/log.txt
-rw-r--r--   3 root supergroup       1429 2018-08-13 20:04 /user/data/phone_data.txt
-rwxrwxrwx   3 j    j                 106 2018-08-13 20:15 /user/data/xiaoxiao.txt
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12

二、客户端API操作

使用IDEA新建一个java项目,至于导包的问题的话,网上很多人是直接将hadoop.tar.gz中share目录的包复制出来,导入进去。 
我是先使用maven导入hadoop依赖,然后打包成war包,解压后再lib中可以看到hadoop的依赖包。

1、上传文件

// 获取配置信息
        Configuration conf = new Configuration();
        conf.set("fs.defaultFs", "hdfs://hadoop001:8020");

        // 获取文件系统
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");

        System.out.println(fileSystem);

        fileSystem.copyFromLocalFile(new Path("C:/usr/local/image/wardrobe/1.jpg"), new Path("/user/data/image/2.jpg"));

        //关闭文件系统
        fileSystem.close();
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13

2、上传文件并删除原文件

    /**
     * 文件上传
     *
     * @throws Exception
     */
    @Test
    public void putFileToHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");

        fileSystem.copyFromLocalFile(false, new Path("C:\\Users\\JessXie\\Downloads\\hadoop-2.6.5.tar.gz"), new Path("/user/data/image/"));
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14

3、文件下载

    /**
     * 文件下载
     *
     * @throws Exception
     */
    @Test
    public void getFileToHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        // 由于是使用虚拟机hadoop集群,不是本地,所以要使用
        //  public static FileSystem get(java.net.URI uri, @NotNull Configuration conf,
        //   String user)方法,在后面加上用户参数。
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");

        fileSystem.copyToLocalFile(true, new Path("/user/data/image/4.jpg"), new Path("C:\\Users\\JessXie\\Downloads\\hadoop-2.6.5.tar.gz"), true);
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

4、创建文件夹

/**
     * 创建文件夹
     */
    @Test
    public void mkdirAtHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 适合一级多级目录
        fileSystem.mkdirs(new Path("/user/data/image/2018"));
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12

5、删除文件

/**
     * 删除文件
     */
    @Test
    public void rmdirAtHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 适合一级多级目录
        fileSystem.delete(new Path("/user/data/image/4.jpg"), true);
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12

6、修改文件名称

/**
     * 修改文件名称
     */
    @Test
    public void renameAtHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 适合一级多级目录
        boolean valid = fileSystem.rename(new Path("/user/data/image/4.jpg"), new Path("/user/data/image/2.jpg"));
        System.out.println(valid ? "修改成功" : "修改失败");
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13

7、查看文件详情

/**
     * 查看文件详情
     */
    @Test
    public void readFileAtHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 适合一级多级目录
        RemoteIterator<LocatedFileStatus> valid = fileSystem.listFiles(new Path("/"), true);
        while (valid.hasNext()) {
            // 将文件信息拿到
            LocatedFileStatus status = valid.next();
            // 打印文件信息
            System.out.println(status.getPath().getName());
            System.out.println(status.getBlockSize());
            System.out.println(status.getLen());
            System.out.println(status.getPermission());
            // 获取文件的块信息
            BlockLocation[] blockLocations = status.getBlockLocations();
            for (BlockLocation blockLocation : blockLocations) {
                System.out.println("block offset:" + blockLocation.getOffset());
                String[] hosts = blockLocation.getHosts();
                for (String host : hosts) {
                    System.out.println("host:" + host);
                }
            }

            System.out.println("----------------------------");
        }
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32

8、查看文件和文件夹信息

/**
     * 查看文件和文件夹信息
     */
    @Test
    public void readFolderAtHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 适合一级多级目录
        FileStatus[] valid = fileSystem.listStatus(new Path("/user/data/image/"));
        for (FileStatus fileStatus : valid) {
            if (fileStatus.isDirectory()) {
                System.out.println("f---" + fileStatus.getPath().getName());
            } else {
                System.out.println("d---" + fileStatus.getPath().getName());
            }
        }
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19

三、客户端IO流操作

1、文件上传

/**
     * 流-上传文件
     *
     * @throws Exception
     */
    @Test
    public void putFileToHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 获取输出流
        FSDataOutputStream fos = fileSystem.create(new Path("/user/data/input/hadoop-2.6.5.tar.gz"));

        // 获取输输入流
        FileInputStream fis = new FileInputStream(new File("C:\\hadoop-2.6.5.tar.gz"));

        // 流对接
        try {
            IOUtils.copyBytes(fis, fos, conf);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            IOUtils.closeStream(fis);
            IOUtils.closeStream(fos);
        }
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27

2、文件下载

/**
     * 流-下载文件
     *
     * @throws Exception
     */
    @Test
    public void getFileFromHDFS() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 获取输入流
        FSDataInputStream fis = fileSystem.open(new Path("/user/data/hadoop-2.6.5.tar.gz"));

        // 获取输出流
        FileOutputStream fos = new FileOutputStream(new File("C:\\hadoop-2.6.5.tar.gz"));

        // 流对接
        try {
            IOUtils.copyBytes(fis, fos, conf);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            IOUtils.closeStream(fis);
            IOUtils.closeStream(fos);
        }
        fileSystem.close();
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27

3、定位文件下载

/**
     * 大文件定位下载第一块
     * @throws Exception
     */
    @Test
    public void getFileFromHDFSSeek1() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 获取输入流
        FSDataInputStream fis = fileSystem.open(new Path("/user/data/image/hadoop-2.6.5.tar.gz"));

        // 获取输出流
        FileOutputStream fos = new FileOutputStream(new File("C:\\hadoop-2.6.5.tar.gz.part1"));
        // 流对接(只读取128M)
        byte[] buf = new byte[1024];
        // 1024 * 1024 * 128
        for (int i = 0; i < 1024 * 128; i++) {
            fis.read(buf);
            fos.write(buf);
        }

        // 关闭流
        try {
            IOUtils.closeStream(fis);
            IOUtils.closeStream(fos);
        } catch (Exception e) {
            e.printStackTrace();
        }
        fileSystem.close();
    }

    /**'
     * 大文件定位下载最后一块
     * @throws Exception
     */
    @Test
    public void getFileFromHDFSSeek2() throws Exception {
        // 获取文件系统
        Configuration conf = new Configuration();
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        // 获取输入流
        FSDataInputStream fis = fileSystem.open(new Path("/user/data/image/hadoop-2.6.5.tar.gz"));

        // 获取输出流
        FileOutputStream fos = new FileOutputStream(new File("C:\\hadoop-2.6.5.tar.gz.part2"));
        // 流对接(只读取128M)
        fis.seek(1024 * 1024 * 128);
        try {
            IOUtils.copyBytes(fis, fos, conf);
        } catch (Exception e){
            e.printStackTrace();
        } finally {
            IOUtils.closeStream(fis);
            IOUtils.closeStream(fos);
        }
        fileSystem.close();

    }
posted @ 2020-08-11 13:51  bjxdd  阅读(189)  评论(0)    收藏  举报