wly603

基于eclipse的hadoop开发-----HDFS API学习

环境:ubuntu8.04.4        hadoop1.0.2       eclipse3.7.2

概要:本文主要是记录HDFS中常用API的使用,如文件上传、创建文件、重命名、删除、读取文件内容等。

 

一、实验步骤

1、启动Hadoop,切换到超级用户
       gqy@localhost:/usr/local/hadoop-1.0.2$ su
      root@localhost:/usr/local/hadoop-1.0.2# bin/hadoop namenode -format
      root@localhost:/usr/local/hadoop-1.0.2# bin/start-all.sh

2、打开Eclipse,新建一个工程。
    File-->New-->Other-->Map/Reduce Project

3、新建类,输入代码。运行时点 Run On Hadoop,观察控制台的输出信息,同时可以在终端用命令查看hdfs的内容 或者 在eclipse中右键DFS 选择disconnect,即可实现刷新文件系统显示

   

上传文件
    /*
     * upload the file from local system to HDFS
     * */
    public void uploadLocalfileToHdfs(String src, String dst) throws IOException
    {
                
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(conf);
        
        Path srcPath = new Path(src);
        Path dstPath = new Path(dst);
        
        hdfs.copyFromLocalFile(srcPath, dstPath);
        
        //print
        System.out.println("Upload to "+conf.get("fs.default.name"));
        
        System.out.println("------------list files------------"+"\n");
        FileStatus [] fileStatus = hdfs.listStatus(dstPath);
        for (FileStatus file : fileStatus) 
        {
            System.out.println(file.getPath());
        }
        
        hdfs.close();
    }
创建文件
/*
     * create a new file in the hdfs
     * 
     * */
    public void createNewHdfsFile(String dst, byte[] content) throws IOException
    {
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(conf);
        
        Path dstPath = new Path(dst);
        FSDataOutputStream outputStream = hdfs.create(dstPath);
        
        outputStream.write(content);
        
        outputStream.close();
        hdfs.close();
        
        System.out.println("success, create a new file in HDFS:  "+dst);
        
    }
重命名
/*
     * rename a file in HDFS
     * */
    public void renameHdfsFile(String src,String newName) throws IOException
    {
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(conf);
        
        Path srcPath = new Path(src);        
        Path dstPath = new Path(newName);
        
        if (hdfs.rename(srcPath, dstPath))
        {
            System.out.println("ok, file: "+src+" renamed to: "+newName);            
        }    
        else
        {
            System.out.println("error, file: "+src+"  rename failed!");
        }
        
        hdfs.close();
    }
删除文件
/*
     * delete a hdfs file
     * 
     * */
    public void deleteHdfsFile(String src) throws IOException
    {
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(conf);
        
        Path srcPath = new Path(src);    
        
        if (hdfs.delete(srcPath, false)) 
        {
            System.out.println("ok, delete file: "+srcPath);
        }
        else
        {
            System.out.println("error,delete file: "+srcPath+" failed!");            
        }
        
        hdfs.close();
    }
新建目录
/*
     * make a new dir in the Hdfs
     * 
     * */
    public void mkdir(String dir) throws IOException
    {
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(conf);
        
        Path dirPath = new Path(dir);
        
        if(hdfs.mkdirs(dirPath))
        {
            System.out.println("ok, make dir: "+dir);
        }
        else
        {
            System.out.println("error, make dir: "+dir+" failed!");
        };
        
        hdfs.close();
    }
读文件内容
/*
     * read the hdfs file content
     * 
     * */
    public byte[] readHdfsFile(String src) throws IOException
    {
        byte[] buffer = null;
        
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(conf);
        
        Path srcPath = new Path(src);
        
        if (hdfs.exists(srcPath))
        {
            FSDataInputStream input = hdfs.open(srcPath);
            
            FileStatus state = hdfs.getFileStatus(srcPath);
            
            //long--->string--->int
            int length = Integer.parseInt(String.valueOf(state.getLen()));
            buffer = new byte[length];
            
            input.readFully(buffer);
            
            input.close();
                    
        }
        else
        {
            System.out.println("error, file is not existed! Read failed!");    
        }
        
        hdfs.close();
        return buffer;
    }
主方法
    /*
     *main 
     * */
    public static void main(String[] args) throws IOException
    {
        //-------------test uploadLocalfile-----------
        String src ="/home/gqy/testFileOperate.txt"; 
        String dst = "/";
        
        HadoopFileOperate testFileOperate = new HadoopFileOperate();
        testFileOperate.uploadLocalfileToHdfs(src, dst);
        
        //-----------test create HDFS file------------
        FileInputStream file = new FileInputStream(src);
        byte[] content = new byte[file.available()];
        file.read(content);  //file the content arrays
        
        String newFileName = "/tmp/testFileOperate/newFile.txt";
        testFileOperate.createNewHdfsFile(newFileName, content);
        
        //-----------test rename HDFS file--------------
        String rename = "/new2.cpp";
        testFileOperate.renameHdfsFile(newFileName, rename);
                
        //----------test make a new dir in Hdfs-------
        String dir = "/tmp/testFileOperate/test";
        testFileOperate.mkdir(dir);
        
        //-----------test delete Hdfs file------------
        testFileOperate.deleteHdfsFile("/tmp/testFileOperate/newFile.txt");
        
        //-----------test read Hdfs file
        byte[] readContent = testFileOperate.readHdfsFile(rename);
        if (readContent != null)
        {
            String contentString = new String(readContent);    
            System.out.println("OK,read content: \n"+contentString);
        }
    
    }

 

二、部分错误的分析

   1、Hdfs中重命名文件失败的原因:

        (1)指定的src文件,不是hdfs文件

       (2)在hdfs中已存在该名字的文件

          我感觉重命名就是把原来的文件删除,在新建一个文件

   2、删除文件失败(delete):

       当hdfs.delete(src,flag)中,src是一个目录时,flag必须为true,实现全部删除。若flag为false,则会抛出异常

   3、上传文件失败:

          目标路径必须是在命令中能显示的:

               hadoop  fs  -ls  /

          若此时只显示   /home/gqy/hadoop

               那么,目标路径取 /home/gqy/tmp 就会上传失败,可以取/home/gqy/hadoop/test

posted on 2012-04-19 16:21  wly603  阅读(2341)  评论(0编辑  收藏  举报

导航