|NO.Z.00055|——————————|BigDataEnd|——|Hadoop&MapReduce.V27|——|Hadoop.v27|源码剖析|DataNode启动流程|

一、[源码剖析之DataNode启动流程] :DataNode 启动流程
### --- datanode的Main Class是DataNode,先找到DataNode.main()

public class DataNode extends ReconfigurableBase
        implements InterDatanodeProtocol, ClientDatanodeProtocol,
        TraceAdminProtocol, DataNodeMXBean, ReconfigurationProtocol {
    public static final Logger LOG = LoggerFactory.getLogger(DataNode.class);
    static{
        HdfsConfiguration.init();
    }
    public static void main(String args[]) {
        if (DFSUtil.parseHelpArgument(args, DataNode.USAGE, System.out, true)) {
            System.exit(0);
        }
        secureMain(args, null);
    }
...
    public static void secureMain(String args[], SecureResources resources) {
        int errorCode = 0;
        try {
        // 打印启动信息
            StringUtils.startupShutdownMessage(DataNode.class, args, LOG);
            // 完成创建datanode的主要工作
            DataNode datanode = createDataNode(args, null, resources);
            if (datanode != null) {
                datanode.join();
            } else {
                errorCode = 1;
            }
        } catch (Throwable e) {
            LOG.fatal("Exception in secureMain", e);
            terminate(1, e);
        } finally {
            LOG.warn("Exiting Datanode");
            terminate(errorCode);
        }
    }
---------------------------------------------------
    public static DataNode createDataNode(String args[], Configuration conf,
                                          SecureResources resources) throws IOException {
        // 完成大部分初始化的工作,并启动部分工作线程
        DataNode dn = instantiateDataNode(args, conf, resources);
        if (dn != null) {
        // 启动剩余工作线程
            dn.runDatanodeDaemon();
        }
        return dn;
    }
--------------------------------------------------
    /** Start a single datanode daemon and wait for it to finish.
     * If this thread is specifically interrupted, it will stop waiting.
     */
    public void runDatanodeDaemon() throws IOException {
        // 在DataNode.instantiateDataNode()执行过程中会调用该方法(见后)
        blockPoolManager.startAll();
        dataXceiverServer.start();
        if (localDataXceiverServer != null) {
            localDataXceiverServer.start();
        }
        ipcServer.start();
        startPlugins(conf);
    }
--------------------------------------------------------
    public static DataNode instantiateDataNode(String args [], Configuration
            conf,
                                               SecureResources resources) throws IOException {
        if (conf == null)
            conf = new HdfsConfiguration();
...     // 参数检查等
        Collection<StorageLocation> dataLocations = getStorageLocations(conf);
        UserGroupInformation.setConfiguration(conf);
        SecurityUtil.login(conf, DFS_DATANODE_KEYTAB_FILE_KEY,
                DFS_DATANODE_KERBEROS_PRINCIPAL_KEY);
        return makeInstance(dataLocations, conf, resources);
    }
--------------------------------------------------------------------
    //DataNode.makeInstance()开始创建DataNode
    static DataNode makeInstance(Collection<StorageLocation> dataDirs,
                                 Configuration conf, SecureResources resources) throws IOException {
...     // 检查数据目录的权限
        assert locations.size() > 0 : "number of data directories should be > 0";
        return new DataNode(conf, locations, resources);
    }
...
    DataNode(final Configuration conf,
             final List<StorageLocation> dataDirs,
             final SecureResources resources) throws IOException {
        super(conf);
...     // 参数设置
        try {
            hostName = getHostName(conf);
            LOG.info("Configured hostname is " + hostName);
            startDataNode(conf, dataDirs, resources);
        } catch (IOException ie) {
            shutdown();
            throw ie;
        }
    }
...
    void startDataNode(Configuration conf,
                       List<StorageLocation> dataDirs,
                       SecureResources resources
    ) throws IOException {
...// 参数设置
        // 初始化DataStorage
        storage = new DataStorage();
        // global DN settings
        // 注册JMX
        registerMXBean();
        // 初始化DataXceiver(流式通信),DataNode runDatanodeDaemon()中启动
        initDataXceiver(conf);
        // 启动InfoServer(Web UI)
        startInfoServer(conf);
        // 启动JVMPauseMonitor(反向监控JVM情况,可通过JMX查询)
        pauseMonitor = new JvmPauseMonitor(conf);
        pauseMonitor.start();
...     // 略
        // 初始化IpcServer(RPC通信),DataNode-runDatanodeDaemon()中启动
        initIpcServer(conf);
        metrics = DataNodeMetrics.create(conf, getDisplayName());
        metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
        // 按照namespace(nameservice)、namenode的结构进行初始化
        blockPoolManager = new BlockPoolManager(this);
        blockPoolManager.refreshNamenodes(conf);
...     // 略
    }
        //BlockPoolManager抽象了datanode提供的数据块存储服务。BlockPoolManager按照
    namespace(nameservice)、namenode结构组织。
    //BlockPoolManager-refreshNamenodes()
    //除了初始化过程主动调用,还可以由namespace通过datanode心跳过程下达刷新命令
    void refreshNamenodes(Configuration conf)
            throws IOException {
        LOG.info("Refresh request received for nameservices: " + conf.get
                (DFSConfigKeys.DFS_NAMESERVICES));
        Map<String, Map<String, InetSocketAddress>> newAddressMap = DFSUtil
                .getNNServiceRpcAddressesForCluster(conf);
        synchronized (refreshNamenodesLock) {
            doRefreshNamenodes(newAddressMap);
        }
    }
-------------------------------------------------------
    private void doRefreshNamenodes(
            Map<String, Map<String, InetSocketAddress>> addrMap) throws IOException {
        assert Thread.holdsLock(refreshNamenodesLock);
        Set<String> toRefresh = Sets.newLinkedHashSet();
        Set<String> toAdd = Sets.newLinkedHashSet();
        Set<String> toRemove;
        synchronized (this) {
        // Step 1. For each of the new nameservices, figure out whether
        // it's an update of the set of NNs for an existing NS,
        // or an entirely new nameservice.
            for (String nameserviceId : addrMap.keySet()) {
                if (bpByNameserviceId.containsKey(nameserviceId)) {
                    toRefresh.add(nameserviceId);
                } else {
                    toAdd.add(nameserviceId);
                }
            }
...     // 略
        // Step 2. Start new nameservices
            if (!toAdd.isEmpty()) {
                LOG.info("Starting BPOfferServices for nameservices: " +
                        Joiner.on(",").useForNull("<default>").join(toAdd));
                for (String nsToAdd : toAdd) {
                    ArrayList<InetSocketAddress> addrs =
                            Lists.newArrayList(addrMap.get(nsToAdd).values());
                    // 为每个namespace创建对应的BPOfferService
                    BPOfferService bpos = createBPOS(addrs);
                    bpByNameserviceId.put(nsToAdd, bpos);
                    offerServices.add(bpos);
                }
            }
            // 然后通过startAll启动所有BPOfferService
            startAll();
        }
...         // 略
    }
------------------------------------------------
    protected BPOfferService createBPOS(List<InetSocketAddress> nnAddrs) {
        return new BPOfferService(nnAddrs, dn);
    }
    BPOfferService(List<InetSocketAddress> nnAddrs, DataNode dn) {
        Preconditions.checkArgument(!nnAddrs.isEmpty(),
                "Must pass at least one NN.");
        this.dn = dn;
        for (InetSocketAddress addr : nnAddrs) {
            this.bpServices.add(new BPServiceActor(addr, this));
        }
    }
--------------------------------------------
    //BlockPoolManager#startAll()启动所有BPOfferService(实际是启动所有
    BPServiceActor)。
    synchronized void startAll() throws IOException {
        try {
            UserGroupInformation.getLoginUser().doAs(
                    new PrivilegedExceptionAction<Object>() {
                        @Override
                        public Object run() throws Exception {
                            for (BPOfferService bpos : offerServices) {
                                bpos.start();
                            }
                            return null;
                        }
                    });
        } catch (InterruptedException ex) {
            IOException ioe = new IOException();
            ioe.initCause(ex.getCause());
            throw ioe;
        }
    }
-------------------------------------------------------
    //在datanode启动的主流程中,启动了多种工作线程,包括InfoServer、JVMPauseMonitor、
    BPServiceActor等。其中,最重要的是BPServiceActor线程,真正代表datanode与namenode通信的
    正是BPServiceActor线程。
    //DataNode--initBlockPool():
    /**
     * One of the Block Pools has successfully connected to its NN.
     * This initializes the local storage for that block pool,
     * checks consistency of the NN's cluster ID, etc.
     *
     * If this is the first block pool to register, this also initializes
     * the datanode-scoped storage.
     *
     * @param bpos Block pool offer service
     * @throws IOException if the NN is inconsistent with the local storage.
     */
    void initBlockPool(BPOfferService bpos) throws IOException {
...// 略
        // 将blockpool注册到BlockPoolManager
        blockPoolManager.addBlockPool(bpos);
        // 初步初始化存储结构
        initStorage(nsInfo);
...     // 检查磁盘损坏
        // 启动扫描器
        initPeriodicScanners(conf);
        // 将blockpool添加到FsDatasetIpml,并继续初始化存储结构
        data.addBlockPool(nsInfo.getBlockPoolID(), conf);
    }

 
 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

posted on 2022-04-07 14:18  yanqi_vip  阅读(26)  评论(0)    收藏  举报

导航