【Flink提交流程源码】四、YarnJobClusterEntrypoint创建ResourceManage、dispatcher

一、YarnJobClusterEntrypoint

进入YarnJobClusterEntrypoint类

main方法

SignalHandler.register(LOG);
		JvmShutdownSafeguard.installAsShutdownHook(LOG);

		Map<String, String> env = System.getenv();

		final String workingDirectory = env.get(ApplicationConstants.Environment.PWD.key());
		Preconditions.checkArgument(
			workingDirectory != null,
			"Working directory variable (%s) not set",
			ApplicationConstants.Environment.PWD.key());

		try {
			YarnEntrypointUtils.logYarnEnvironmentInformation(env, LOG);
		} catch (IOException e) {
			LOG.warn("Could not log YARN environment information.", e);
		}

		final Configuration dynamicParameters = ClusterEntrypointUtils.parseParametersOrExit(
			args,
			new DynamicParametersConfigurationParserFactory(),
			YarnJobClusterEntrypoint.class);
		final Configuration configuration = YarnEntrypointUtils.loadConfiguration(workingDirectory, dynamicParameters, env);

		YarnJobClusterEntrypoint yarnJobClusterEntrypoint = new YarnJobClusterEntrypoint(configuration);
          //执行程序的入口
		ClusterEntrypoint.runClusterEntrypoint(yarnJobClusterEntrypoint);

  

ClusterEntrypoint.runClusterEntrypoint(yarnJobClusterEntrypoint);
进入到
ClusterEntrypoint类
clusterEntrypoint.startCluster();

securityContext.runSecured((Callable<Void>) () -> {
runCluster(configuration, pluginManager);

return null;
});

进入runCluster
synchronized (lock) {
  //初始化服务rpc相关
initializeServices(configuration, pluginManager);

// write host information into configuration
configuration.setString(JobManagerOptions.ADDRESS, commonRpcService.getAddress());
configuration.setInteger(JobManagerOptions.PORT, commonRpcService.getPort());

final DispatcherResourceManagerComponentFactory dispatcherResourceManagerComponentFactory = createDispatcherResourceManagerComponentFactory(configuration);

//创建ResourceManage,创建、启动Dispatcher,启动ResourceManage
clusterComponent = dispatcherResourceManagerComponentFactory.create(
configuration,
ioExecutor,
commonRpcService,
haServices,
blobServer,
heartbeatServices,
metricRegistry,
archivedExecutionGraphStore,
new RpcMetricQueryServiceRetriever(metricRegistry.getMetricQueryServiceRpcService()),
this);

clusterComponent.getShutDownFuture().whenComplete(
(ApplicationStatus applicationStatus, Throwable throwable) -> {
if (throwable != null) {
shutDownAsync(
ApplicationStatus.UNKNOWN,
ExceptionUtils.stringifyException(throwable),
false);
} else {
// This is the general shutdown path. If a separate more specific shutdown was
// already triggered, this will do nothing
shutDownAsync(
applicationStatus,
null,
true);
}
});
}

进入

DispatcherResourceManagerComponentFactory类
dispatcherResourceManagerComponentFactory.create(

创建ResourceMange、Dispatcher,并启动

clusterComponent = dispatcherResourceManagerComponentFactory.create(
				configuration,
				ioExecutor,
				commonRpcService,
				haServices,
				blobServer,
				heartbeatServices,
				metricRegistry,
				archivedExecutionGraphStore,
				new RpcMetricQueryServiceRetriever(metricRegistry.getMetricQueryServiceRpcService()),
				this); 

找到它具体实现类

DefaultDispatcherResourceManagerComponentFactory

 

webMonitorEndpoint = restEndpointFactory.createRestEndpoint(
				configuration,
				dispatcherGatewayRetriever,
				resourceManagerGatewayRetriever,
				blobServer,
				executor,
				metricFetcher,
				highAvailabilityServices.getClusterRestEndpointLeaderElectionService(),
				fatalErrorHandler);

			log.debug("Starting Dispatcher REST endpoint.");
			webMonitorEndpoint.start();

 resourceManage的启动

resourceManager = resourceManagerFactory.createResourceManager(
				configuration,
				ResourceID.generate(),
				rpcService,
				highAvailabilityServices,
				heartbeatServices,
				fatalErrorHandler,
				new ClusterInformation(hostname, blobServer.getPort()),
				webMonitorEndpoint.getRestBaseUrl(),
				metricRegistry,
				hostname,
				ioExecutor);

  创建、启动Dispatcher

dispatcherRunner = dispatcherRunnerFactory.createDispatcherRunner(
				highAvailabilityServices.getDispatcherLeaderElectionService(),
				fatalErrorHandler,
				new HaServicesJobGraphStoreFactory(highAvailabilityServices),
				ioExecutor,
				rpcService,
				partialDispatcherServices);

  

进入

DispatcherRunnerFactory
DispatcherRunner createDispatcherRunner(
		LeaderElectionService leaderElectionService,
		FatalErrorHandler fatalErrorHandler,
		JobGraphStoreFactory jobGraphStoreFactory,
		Executor ioExecutor,
		RpcService rpcService,
		PartialDispatcherServices partialDispatcherServices) throws Exception;

进入

DefaultDispatcherRunnerFactory
DefaultDispatcherRunner.create(
			leaderElectionService,
			fatalErrorHandler,
			dispatcherLeaderProcessFactory); 

进入

DefaultDispatcherRunner

final DefaultDispatcherRunner dispatcherRunner = new DefaultDispatcherRunner(
			leaderElectionService,
			fatalErrorHandler,
			dispatcherLeaderProcessFactory);
		return DispatcherRunnerLeaderElectionLifecycleManager.createFor(dispatcherRunner, leaderElectionService);j
进入
DispatcherRunnerLeaderElectionLifecycleManager
return new DispatcherRunnerLeaderElectionLifecycleManager<>(dispatcherRunner, leaderElectionService);

leaderElectionService.start(dispatcherRunner);

  

找到
实现类StandaloneLeaderElectionService
contender = Preconditions.checkNotNull(newContender);

		// directly grant leadership to the given contender 选举服务:每个组件都有选举服务,最终要调用这个
 contender.grantLeadership(HighAvailabilityServices.DEFAULT_LEADER_ID);

  

找到
实现类DefaultDispatcherRunner
public void grantLeadership(UUID leaderSessionID) {
		runActionIfRunning(() -> startNewDispatcherLeaderProcess(leaderSessionID));
	}

dispatcherLeaderProcess = createNewDispatcherLeaderProcess(leaderSessionID);

final DispatcherLeaderProcess newDispatcherLeaderProcess = dispatcherLeaderProcess;
FutureUtils.assertNoException(
previousDispatcherLeaderProcessTerminationFuture.thenRun(newDispatcherLeaderProcess::start));

  

找到

实现类AbstractDispatcherLeaderProcess

public final void start() {
runIfStateIs(
State.CREATED,
this::startInternal);
}

private void startInternal() {
log.info("Start {}.", getClass().getSimpleName());
state = State.RUNNING;
onStart();
}

  

找到子类

JobDispatcherLeaderProcess onStart  
protected void onStart() {
		final DispatcherGatewayService dispatcherService = dispatcherGatewayServiceFactory.create(
			DispatcherId.fromUuid(getLeaderSessionId()),
			Collections.singleton(jobGraph),
			ThrowingJobGraphWriter.INSTANCE);

		completeDispatcherSetup(dispatcherService);
	}

  进入

DefaultDispatcherGatewayServiceFactory类

public AbstractDispatcherLeaderProcess.DispatcherGatewayService create(
			DispatcherId fencingToken,
			Collection<JobGraph> recoveredJobs,
			JobGraphWriter jobGraphWriter) {

		final Dispatcher dispatcher;
		try {
			dispatcher = dispatcherFactory.createDispatcher(
				rpcService,
				fencingToken,
				recoveredJobs,
				(dispatcherGateway, scheduledExecutor, errorHandler) -> new NoOpDispatcherBootstrap(),
				PartialDispatcherServicesWithJobGraphStore.from(partialDispatcherServices, jobGraphWriter));
		} catch (Exception e) {
			throw new FlinkRuntimeException("Could not create the Dispatcher rpc endpoint.", e);
		}

//rpc调用 dispatcher.start(); return DefaultDispatcherGatewayService.from(dispatcher);

  

最终rpc调用,akka组件通信onStart方法
public final void start() {
rpcServer.start();
}

 dispatcher.start最终要去dispatcher类的onStart去找具体实现,到下一节

 

posted @ 2021-06-18 18:39  持枢  阅读(471)  评论(0)    收藏  举报