[转载]hdfs c/c++ API

原文链接:http://blog.csdn.net/sprintfwater/article/details/8996214

1.建立、关闭与HDFS连接:hdfsConnect()、hdfsConnectAsUser()、hdfsDisconnect()。hdfsConnect()实际上是直接调用hdfsConnectAsUser。

2.打开、关闭HDFS文件:hdfsOpenFile()、hdfsCloseFile()。当用hdfsOpenFile()创建文件时,可以指定replication和blocksize参数。写打开一个文件时,隐含O_TRUNC标志,文件会被截断,写入是从文件头开始的。

3.读HDFS文件:hdfsRead()、hdfsPread()。两个函数都有可能返回少于用户要求的字节数,此时可以再次调用这两个函数读入剩下的部分(类似APUE中的readn实现);只有在两个函数返回零时,我们才能断定到了文件末尾。

4.写HDFS文件:hdfsWrite()。HDFS不支持随机写,只能是从文件头顺序写入。

5.查询HDFS文件信息:hdfsGetPathInfo()

6.查询和设置HDFS文件读写偏移量:hdfsSeek()、hdfsTell()

7.查询数据块所在节点信息:hdfsGetHosts()。返回一个或多个数据块所在数据节点的信息,一个数据块可能存在多个数据节点上。

8.libhdfs中的函数是通过jni调用JAVA虚拟机,在虚拟机中构造对应的HDFS的JAVA类,然后反射调用该类的功能函数。总会发生JVM和程序之间内存拷贝的动作,性能方面值得注意。

9.HDFS不支持多个客户端同时写入的操作,无文件或是记录锁的概念。

10.建议只有超大文件才应该考虑放在HDFS上,而且最好对文件的访问是写一次,读多次。小文件不应该考虑放在HDFS上,得不偿失!

  1 /**
  2  * Licensed to the Apache Software Foundation (ASF) under one
  3  * or more contributor license agreements.  See the NOTICE file
  4  * distributed with this work for additional information
  5  * regarding copyright ownership.  The ASF licenses this file
  6  * to you under the Apache License, Version 2.0 (the
  7  * "License"); you may not use this file except in compliance
  8  * with the License.  You may obtain a copy of the License at
  9  *
 10  *     http://www.apache.org/licenses/LICENSE-2.0
 11  *
 12  * Unless required by applicable law or agreed to in writing, software
 13  * distributed under the License is distributed on an "AS IS" BASIS,
 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  * See the License for the specific language governing permissions and
 16  * limitations under the License.
 17  */
 18 
 19 #ifndef LIBHDFS_HDFS_H
 20 #define LIBHDFS_HDFS_H
 21 
 22 #include <sys/types.h>
 23 #include <sys/stat.h>
 24 
 25 #include <fcntl.h>
 26 #include <stdio.h>
 27 #include <stdint.h>
 28 #include <string.h>
 29 #include <stdlib.h>
 30 #include <time.h>
 31 #include <errno.h>
 32 
 33 #include <jni.h>
 34 
 35 #ifndef O_RDONLY
 36 #define O_RDONLY 1
 37 #endif
 38 
 39 #ifndef O_WRONLY 
 40 #define O_WRONLY 2
 41 #endif
 42 
 43 #ifndef EINTERNAL
 44 #define EINTERNAL 255 
 45 #endif
 46 
 47 
 48 /** All APIs set errno to meaningful values */
 49 #ifdef __cplusplus
 50 extern  "C" {
 51 #endif
 52 
 53     /**
 54      * Some utility decls used in libhdfs.
 55      */
 56 
 57     typedef int32_t   tSize; /// size of data for read/write io ops 
 58     typedef time_t    tTime; /// time type
 59     typedef int64_t   tOffset;/// offset within the file
 60     typedef uint16_t  tPort; /// port
 61     typedef enum tObjectKind {
 62         kObjectKindFile = 'F',
 63         kObjectKindDirectory = 'D',
 64     } tObjectKind;
 65 
 66 
 67     /**
 68      * The C reflection of org.apache.org.hadoop.FileSystem .
 69      */
 70     typedef void* hdfsFS;
 71 
 72     
 73     /**
 74      * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream .
 75      */
 76     enum hdfsStreamType
 77     {
 78         UNINITIALIZED = 0,
 79         INPUT = 1,
 80         OUTPUT = 2,
 81     };
 82 
 83     
 84     /**
 85      * The 'file-handle' to a file in hdfs.
 86      */
 87     struct hdfsFile_internal {
 88         void* file;
 89         enum hdfsStreamType type;
 90     };
 91     typedef struct hdfsFile_internal* hdfsFile;
 92       
 93 
 94     /** 
 95      * hdfsConnect - Connect to a hdfs file system.
 96      * Connect to the hdfs.
 97      * @param host A string containing either a host name, or an ip address
 98      * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
 99      * you want to connect to local filesystem. 'host' should be passed as
100      * 'default' (and port as 0) to used the 'configured' filesystem
101      * (hadoop-site/hadoop-default.xml).
102      * @param port The port on which the server is listening.
103      * @return Returns a handle to the filesystem or NULL on error.
104      */
105     hdfsFS hdfsConnect(const char* host, tPort port);
106 
107 
108     /** 
109      * hdfsDisconnect - Disconnect from the hdfs file system.
110      * Disconnect from hdfs.
111      * @param fs The configured filesystem handle.
112      * @return Returns 0 on success, -1 on error.  
113      */
114     int hdfsDisconnect(hdfsFS fs);
115         
116 
117     /** 
118      * hdfsOpenFile - Open a hdfs file in given mode.
119      * @param fs The configured filesystem handle.
120      * @param path The full path to the file.
121      * @param flags Either O_RDONLY or O_WRONLY, for read-only or write-only.
122      * @param bufferSize Size of buffer for read/write - pass 0 if you want
123      * to use the default configured values.
124      * @param replication Block replication - pass 0 if you want to use
125      * the default configured values.
126      * @param blocksize Size of block - pass 0 if you want to use the
127      * default configured values.
128      * @return Returns the handle to the open file or NULL on error.
129      */
130     hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
131                           int bufferSize, short replication, tSize blocksize);
132 
133 
134     /** 
135      * hdfsCloseFile - Close an open file. 
136      * @param fs The configured filesystem handle.
137      * @param file The file handle.
138      * @return Returns 0 on success, -1 on error.  
139      */
140     int hdfsCloseFile(hdfsFS fs, hdfsFile file);
141 
142 
143     /** 
144      * hdfsExists - Checks if a given path exsits on the filesystem 
145      * @param fs The configured filesystem handle.
146      * @param path The path to look for
147      * @return Returns 0 on success, -1 on error.  
148      */
149     int hdfsExists(hdfsFS fs, const char *path);
150 
151 
152     /** 
153      * hdfsSeek - Seek to given offset in file. 
154      * This works only for files opened in read-only mode. 
155      * @param fs The configured filesystem handle.
156      * @param file The file handle.
157      * @param desiredPos Offset into the file to seek into.
158      * @return Returns 0 on success, -1 on error.  
159      */
160     int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); 
161 
162 
163     /** 
164      * hdfsTell - Get the current offset in the file, in bytes.
165      * @param fs The configured filesystem handle.
166      * @param file The file handle.
167      * @return Current offset, -1 on error.
168      */
169     tOffset hdfsTell(hdfsFS fs, hdfsFile file);
170 
171 
172     /** 
173      * hdfsRead - Read data from an open file.
174      * @param fs The configured filesystem handle.
175      * @param file The file handle.
176      * @param buffer The buffer to copy read bytes into.
177      * @param length The length of the buffer.
178      * @return Returns the number of bytes actually read, possibly less
179      * than than length;-1 on error.
180      */
181     tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);
182 
183 
184     /** 
185      * hdfsPread - Positional read of data from an open file.
186      * @param fs The configured filesystem handle.
187      * @param file The file handle.
188      * @param position Position from which to read
189      * @param buffer The buffer to copy read bytes into.
190      * @param length The length of the buffer.
191      * @return Returns the number of bytes actually read, possibly less than
192      * than length;-1 on error.
193      */
194     tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,
195                     void* buffer, tSize length);
196 
197 
198     /** 
199      * hdfsWrite - Write data into an open file.
200      * @param fs The configured filesystem handle.
201      * @param file The file handle.
202      * @param buffer The data.
203      * @param length The no. of bytes to write. 
204      * @return Returns the number of bytes written, -1 on error.
205      */
206     tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,
207                     tSize length);
208 
209 
210     /** 
211      * hdfsWrite - Flush the data. 
212      * @param fs The configured filesystem handle.
213      * @param file The file handle.
214      * @return Returns 0 on success, -1 on error. 
215      */
216     int hdfsFlush(hdfsFS fs, hdfsFile file);
217 
218 
219     /**
220      * hdfsAvailable - Number of bytes that can be read from this
221      * input stream without blocking.
222      * @param fs The configured filesystem handle.
223      * @param file The file handle.
224      * @return Returns available bytes; -1 on error. 
225      */
226     int hdfsAvailable(hdfsFS fs, hdfsFile file);
227 
228 
229     /**
230      * hdfsCopy - Copy file from one filesystem to another.
231      * @param srcFS The handle to source filesystem.
232      * @param src The path of source file. 
233      * @param dstFS The handle to destination filesystem.
234      * @param dst The path of destination file. 
235      * @return Returns 0 on success, -1 on error. 
236      */
237     int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
238 
239 
240     /**
241      * hdfsMove - Move file from one filesystem to another.
242      * @param srcFS The handle to source filesystem.
243      * @param src The path of source file. 
244      * @param dstFS The handle to destination filesystem.
245      * @param dst The path of destination file. 
246      * @return Returns 0 on success, -1 on error. 
247      */
248     int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
249 
250 
251     /**
252      * hdfsDelete - Delete file. 
253      * @param fs The configured filesystem handle.
254      * @param path The path of the file. 
255      * @return Returns 0 on success, -1 on error. 
256      */
257     int hdfsDelete(hdfsFS fs, const char* path);
258 
259 
260     /**
261      * hdfsRename - Rename file. 
262      * @param fs The configured filesystem handle.
263      * @param oldPath The path of the source file. 
264      * @param newPath The path of the destination file. 
265      * @return Returns 0 on success, -1 on error. 
266      */
267     int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);
268 
269 
270     /** 
271      * hdfsGetWorkingDirectory - Get the current working directory for
272      * the given filesystem.
273      * @param fs The configured filesystem handle.
274      * @param buffer The user-buffer to copy path of cwd into. 
275      * @param bufferSize The length of user-buffer.
276      * @return Returns buffer, NULL on error.
277      */
278     char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);
279 
280 
281     /** 
282      * hdfsSetWorkingDirectory - Set the working directory. All relative
283      * paths will be resolved relative to it.
284      * @param fs The configured filesystem handle.
285      * @param path The path of the new 'cwd'. 
286      * @return Returns 0 on success, -1 on error. 
287      */
288     int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);
289 
290 
291     /** 
292      * hdfsCreateDirectory - Make the given file and all non-existent
293      * parents into directories.
294      * @param fs The configured filesystem handle.
295      * @param path The path of the directory. 
296      * @return Returns 0 on success, -1 on error. 
297      */
298     int hdfsCreateDirectory(hdfsFS fs, const char* path);
299 
300 
301     /** 
302      * hdfsSetReplication - Set the replication of the specified
303      * file to the supplied value
304      * @param fs The configured filesystem handle.
305      * @param path The path of the file. 
306      * @return Returns 0 on success, -1 on error. 
307      */
308     int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);
309 
310 
311     /** 
312      * hdfsFileInfo - Information about a file/directory.
313      */
314     typedef struct  {
315         tObjectKind mKind;   /* file or directory */
316         char *mName;         /* the name of the file */
317         tTime mLastMod;      /* the last modification time for the file*/
318         tOffset mSize;       /* the size of the file in bytes */
319         short mReplication;    /* the count of replicas */
320         tOffset mBlockSize;  /* the block size for the file */
321     } hdfsFileInfo;
322 
323 
324     /** 
325      * hdfsListDirectory - Get list of files/directories for a given
326      * directory-path. hdfsFreeFileInfo should be called to deallocate memory. 
327      * @param fs The configured filesystem handle.
328      * @param path The path of the directory. 
329      * @param numEntries Set to the number of files/directories in path.
330      * @return Returns a dynamically-allocated array of hdfsFileInfo
331      * objects; NULL on error.
332      */
333     hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,
334                                     int *numEntries);
335 
336 
337     /** 
338      * hdfsGetPathInfo - Get information about a path as a (dynamically
339      * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be
340      * called when the pointer is no longer needed.
341      * @param fs The configured filesystem handle.
342      * @param path The path of the file. 
343      * @return Returns a dynamically-allocated hdfsFileInfo object;
344      * NULL on error.
345      */
346     hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);
347 
348 
349     /** 
350      * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) 
351      * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
352      * objects.
353      * @param numEntries The size of the array.
354      */
355     void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);
356 
357 
358     /** 
359      * hdfsGetHosts - Get hostnames where a particular block (determined by
360      * pos & blocksize) of a file is stored. The last element in the array
361      * is NULL. Due to replication, a single block could be present on
362      * multiple hosts.
363      * @param fs The configured filesystem handle.
364      * @param path The path of the file. 
365      * @param start The start of the block.
366      * @param length The length of the block.
367      * @return Returns a dynamically-allocated 2-d array of blocks-hosts;
368      * NULL on error.
369      */
370     char*** hdfsGetHosts(hdfsFS fs, const char* path, 
371             tOffset start, tOffset length);
372 
373 
374     /** 
375      * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts
376      * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
377      * objects.
378      * @param numEntries The size of the array.
379      */
380     void hdfsFreeHosts(char ***blockHosts);
381 
382 
383     /** 
384      * hdfsGetDefaultBlockSize - Get the optimum blocksize.
385      * @param fs The configured filesystem handle.
386      * @return Returns the blocksize; -1 on error. 
387      */
388     tOffset hdfsGetDefaultBlockSize(hdfsFS fs);
389 
390 
391     /** 
392      * hdfsGetCapacity - Return the raw capacity of the filesystem.  
393      * @param fs The configured filesystem handle.
394      * @return Returns the raw-capacity; -1 on error. 
395      */
396     tOffset hdfsGetCapacity(hdfsFS fs);
397 
398 
399     /** 
400      * hdfsGetUsed - Return the total raw size of all files in the filesystem.
401      * @param fs The configured filesystem handle.
402      * @return Returns the total-size; -1 on error. 
403      */
404     tOffset hdfsGetUsed(hdfsFS fs);
405     
406 #ifdef __cplusplus
407 }
408 #endif
409 
410 #endif /*LIBHDFS_HDFS_H*/

 

posted @ 2014-08-24 16:53  caoyingjie  阅读(3698)  评论(0编辑  收藏  举报