hadoop FileSplit

/** A section of an input file. Returned by {@link
* InputFormat#getSplits(JobContext)} and passed to
* {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. 
* 
* 文件的一部分,通过InputFormat#getSplits(JobContext)生成
* 作为参数生产RecordReader:InputFormat#createRecordReader(InputSplit,TaskAttemptContext)
* 实现了InputSplit接口
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class FileSplit extends InputSplit implements Writable {
private Path file;
private long start;
private long length;
private String[] hosts;
private SplitLocationInfo[] hostInfos;

public FileSplit() {}

/** Constructs a split with host information
*
* @param file the file name。 文件名称
* @param start the position of the first byte in the file to process。第一个byte的偏移量
* @param length the number of bytes in the file to process。 split的长度
* @param hosts the list of hosts containing the block, possibly null。 split所在的主机列表
*/
public FileSplit(Path file, long start, long length, String[] hosts) {
this.file = file;
this.start = start;
this.length = length;
this.hosts = hosts;
}

/** Constructs a split with host and cached-blocks information
*
* @param file the file name。 文件名称
* @param start the position of the first byte in the file to process。第一个byte的偏移量
* @param length the number of bytes in the file to process split的长度
* @param hosts the list of hosts containing the block split所在的主机列表
* @param inMemoryHosts the list of hosts containing the block in memory 在内存中保存block的机器列表
*/
public FileSplit(Path file, long start, long length, String[] hosts,
String[] inMemoryHosts) {
this(file, start, length, hosts);
hostInfos = new SplitLocationInfo[hosts.length];
for (int i = 0; i < hosts.length; i++) {
// because N will be tiny, scanning is probably faster than a HashSet
boolean inMemory = false;
for (String inMemoryHost : inMemoryHosts) {
if (inMemoryHost.equals(hosts[i])) {
inMemory = true;
break;
}
}
hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
}
}

/** The file containing this split's data. */
public Path getPath() { return file; }

/** The position of the first byte in the file to process. */
public long getStart() { return start; }

/** The number of bytes in the file to process. */
@Override
public long getLength() { return length; }

@Override
public String toString() { return file + ":" + start + "+" + length; }

////////////////////////////////////////////
// Writable methods
////////////////////////////////////////////

@Override
public void write(DataOutput out) throws IOException {
Text.writeString(out, file.toString());
out.writeLong(start);
out.writeLong(length);
}

@Override
public void readFields(DataInput in) throws IOException {
file = new Path(Text.readString(in));
start = in.readLong();
length = in.readLong();
hosts = null;
}

@Override
public String[] getLocations() throws IOException {
if (this.hosts == null) {
return new String[]{};
} else {
return this.hosts;
}
}

@Override
@Evolving
public SplitLocationInfo[] getLocationInfo() throws IOException {
return hostInfos;
}
}

 

posted @ 2018-07-20 10:49  天添  阅读(938)  评论(0编辑  收藏  举报