|NO.Z.00055|——————————|BigDataEnd|——|Hadoop&MapReduce.V27|——|Hadoop.v27|源码剖析|DataNode启动流程|
一、[源码剖析之DataNode启动流程] :DataNode 启动流程
### --- datanode的Main Class是DataNode,先找到DataNode.main()
public class DataNode extends ReconfigurableBase
implements InterDatanodeProtocol, ClientDatanodeProtocol,
TraceAdminProtocol, DataNodeMXBean, ReconfigurationProtocol {
public static final Logger LOG = LoggerFactory.getLogger(DataNode.class);
static{
HdfsConfiguration.init();
}
public static void main(String args[]) {
if (DFSUtil.parseHelpArgument(args, DataNode.USAGE, System.out, true)) {
System.exit(0);
}
secureMain(args, null);
}
...
public static void secureMain(String args[], SecureResources resources) {
int errorCode = 0;
try {
// 打印启动信息
StringUtils.startupShutdownMessage(DataNode.class, args, LOG);
// 完成创建datanode的主要工作
DataNode datanode = createDataNode(args, null, resources);
if (datanode != null) {
datanode.join();
} else {
errorCode = 1;
}
} catch (Throwable e) {
LOG.fatal("Exception in secureMain", e);
terminate(1, e);
} finally {
LOG.warn("Exiting Datanode");
terminate(errorCode);
}
}
---------------------------------------------------
public static DataNode createDataNode(String args[], Configuration conf,
SecureResources resources) throws IOException {
// 完成大部分初始化的工作,并启动部分工作线程
DataNode dn = instantiateDataNode(args, conf, resources);
if (dn != null) {
// 启动剩余工作线程
dn.runDatanodeDaemon();
}
return dn;
}
--------------------------------------------------
/** Start a single datanode daemon and wait for it to finish.
* If this thread is specifically interrupted, it will stop waiting.
*/
public void runDatanodeDaemon() throws IOException {
// 在DataNode.instantiateDataNode()执行过程中会调用该方法(见后)
blockPoolManager.startAll();
dataXceiverServer.start();
if (localDataXceiverServer != null) {
localDataXceiverServer.start();
}
ipcServer.start();
startPlugins(conf);
}
--------------------------------------------------------
public static DataNode instantiateDataNode(String args [], Configuration
conf,
SecureResources resources) throws IOException {
if (conf == null)
conf = new HdfsConfiguration();
... // 参数检查等
Collection<StorageLocation> dataLocations = getStorageLocations(conf);
UserGroupInformation.setConfiguration(conf);
SecurityUtil.login(conf, DFS_DATANODE_KEYTAB_FILE_KEY,
DFS_DATANODE_KERBEROS_PRINCIPAL_KEY);
return makeInstance(dataLocations, conf, resources);
}
--------------------------------------------------------------------
//DataNode.makeInstance()开始创建DataNode
static DataNode makeInstance(Collection<StorageLocation> dataDirs,
Configuration conf, SecureResources resources) throws IOException {
... // 检查数据目录的权限
assert locations.size() > 0 : "number of data directories should be > 0";
return new DataNode(conf, locations, resources);
}
...
DataNode(final Configuration conf,
final List<StorageLocation> dataDirs,
final SecureResources resources) throws IOException {
super(conf);
... // 参数设置
try {
hostName = getHostName(conf);
LOG.info("Configured hostname is " + hostName);
startDataNode(conf, dataDirs, resources);
} catch (IOException ie) {
shutdown();
throw ie;
}
}
...
void startDataNode(Configuration conf,
List<StorageLocation> dataDirs,
SecureResources resources
) throws IOException {
...// 参数设置
// 初始化DataStorage
storage = new DataStorage();
// global DN settings
// 注册JMX
registerMXBean();
// 初始化DataXceiver(流式通信),DataNode runDatanodeDaemon()中启动
initDataXceiver(conf);
// 启动InfoServer(Web UI)
startInfoServer(conf);
// 启动JVMPauseMonitor(反向监控JVM情况,可通过JMX查询)
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor.start();
... // 略
// 初始化IpcServer(RPC通信),DataNode-runDatanodeDaemon()中启动
initIpcServer(conf);
metrics = DataNodeMetrics.create(conf, getDisplayName());
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
// 按照namespace(nameservice)、namenode的结构进行初始化
blockPoolManager = new BlockPoolManager(this);
blockPoolManager.refreshNamenodes(conf);
... // 略
}
//BlockPoolManager抽象了datanode提供的数据块存储服务。BlockPoolManager按照
namespace(nameservice)、namenode结构组织。
//BlockPoolManager-refreshNamenodes()
//除了初始化过程主动调用,还可以由namespace通过datanode心跳过程下达刷新命令
void refreshNamenodes(Configuration conf)
throws IOException {
LOG.info("Refresh request received for nameservices: " + conf.get
(DFSConfigKeys.DFS_NAMESERVICES));
Map<String, Map<String, InetSocketAddress>> newAddressMap = DFSUtil
.getNNServiceRpcAddressesForCluster(conf);
synchronized (refreshNamenodesLock) {
doRefreshNamenodes(newAddressMap);
}
}
-------------------------------------------------------
private void doRefreshNamenodes(
Map<String, Map<String, InetSocketAddress>> addrMap) throws IOException {
assert Thread.holdsLock(refreshNamenodesLock);
Set<String> toRefresh = Sets.newLinkedHashSet();
Set<String> toAdd = Sets.newLinkedHashSet();
Set<String> toRemove;
synchronized (this) {
// Step 1. For each of the new nameservices, figure out whether
// it's an update of the set of NNs for an existing NS,
// or an entirely new nameservice.
for (String nameserviceId : addrMap.keySet()) {
if (bpByNameserviceId.containsKey(nameserviceId)) {
toRefresh.add(nameserviceId);
} else {
toAdd.add(nameserviceId);
}
}
... // 略
// Step 2. Start new nameservices
if (!toAdd.isEmpty()) {
LOG.info("Starting BPOfferServices for nameservices: " +
Joiner.on(",").useForNull("<default>").join(toAdd));
for (String nsToAdd : toAdd) {
ArrayList<InetSocketAddress> addrs =
Lists.newArrayList(addrMap.get(nsToAdd).values());
// 为每个namespace创建对应的BPOfferService
BPOfferService bpos = createBPOS(addrs);
bpByNameserviceId.put(nsToAdd, bpos);
offerServices.add(bpos);
}
}
// 然后通过startAll启动所有BPOfferService
startAll();
}
... // 略
}
------------------------------------------------
protected BPOfferService createBPOS(List<InetSocketAddress> nnAddrs) {
return new BPOfferService(nnAddrs, dn);
}
BPOfferService(List<InetSocketAddress> nnAddrs, DataNode dn) {
Preconditions.checkArgument(!nnAddrs.isEmpty(),
"Must pass at least one NN.");
this.dn = dn;
for (InetSocketAddress addr : nnAddrs) {
this.bpServices.add(new BPServiceActor(addr, this));
}
}
--------------------------------------------
//BlockPoolManager#startAll()启动所有BPOfferService(实际是启动所有
BPServiceActor)。
synchronized void startAll() throws IOException {
try {
UserGroupInformation.getLoginUser().doAs(
new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
for (BPOfferService bpos : offerServices) {
bpos.start();
}
return null;
}
});
} catch (InterruptedException ex) {
IOException ioe = new IOException();
ioe.initCause(ex.getCause());
throw ioe;
}
}
-------------------------------------------------------
//在datanode启动的主流程中,启动了多种工作线程,包括InfoServer、JVMPauseMonitor、
BPServiceActor等。其中,最重要的是BPServiceActor线程,真正代表datanode与namenode通信的
正是BPServiceActor线程。
//DataNode--initBlockPool():
/**
* One of the Block Pools has successfully connected to its NN.
* This initializes the local storage for that block pool,
* checks consistency of the NN's cluster ID, etc.
*
* If this is the first block pool to register, this also initializes
* the datanode-scoped storage.
*
* @param bpos Block pool offer service
* @throws IOException if the NN is inconsistent with the local storage.
*/
void initBlockPool(BPOfferService bpos) throws IOException {
...// 略
// 将blockpool注册到BlockPoolManager
blockPoolManager.addBlockPool(bpos);
// 初步初始化存储结构
initStorage(nsInfo);
... // 检查磁盘损坏
// 启动扫描器
initPeriodicScanners(conf);
// 将blockpool添加到FsDatasetIpml,并继续初始化存储结构
data.addBlockPool(nsInfo.getBlockPoolID(), conf);
}
Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
——W.S.Landor
浙公网安备 33010602011771号