Spark on YARN的yarn-cluster模式介绍
在spark-submit脚本的第27行,可以看到如下

$SPARK_HOME/bin/spark-class可以忽略(它主要是构建应用程序所需的cmd命令),即所有的参数传递到SparkSubmit类中并执行作业提交,如下:
object SparkSubmit extends CommandLineUtils with Logging { // Cluster managers private val YARN = 1 private val STANDALONE = 2 private val MESOS = 4 private val LOCAL = 8 private val KUBERNETES = 16 private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL | KUBERNETES // Deploy modes private val CLIENT = 1 private val CLUSTER = 2 private val ALL_DEPLOY_MODES = CLIENT | CLUSTER // Special primary resource names that represent shells rather than application jars. private val SPARK_SHELL = "spark-shell" private val PYSPARK_SHELL = "pyspark-shell" private val SPARKR_SHELL = "sparkr-shell" private val SPARKR_PACKAGE_ARCHIVE = "sparkr.zip" private val R_PACKAGE_ARCHIVE = "rpkg.zip" private val CLASS_NOT_FOUND_EXIT_STATUS = 101 // Following constants are visible for testing. private[deploy] val YARN_CLUSTER_SUBMIT_CLASS = "org.apache.spark.deploy.yarn.YarnClusterApplication" private[deploy] val REST_CLUSTER_SUBMIT_CLASS = classOf[RestSubmissionClientApp].getName() private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName() private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS = "org.apache.spark.deploy.k8s.submit.KubernetesClientApplication" override def main(args: Array[String]): Unit = { val submit = new SparkSubmit() { self => override protected def parseArguments(args: Array[String]): SparkSubmitArguments = { new SparkSubmitArguments(args) { override protected def logInfo(msg: => String): Unit = self.logInfo(msg) override protected def logWarning(msg: => String): Unit = self.logWarning(msg) } } override protected def logInfo(msg: => String): Unit = printMessage(msg) override protected def logWarning(msg: => String): Unit = printMessage(s"Warning: $msg") override def doSubmit(args: Array[String]): Unit = { try { super.doSubmit(args) } catch { case e: SparkUserAppException => exitFn(e.exitCode) } } } submit.doSubmit(args) } /** * Return whether the given primary resource represents a user jar. */ private[deploy] def isUserJar(res: String): Boolean = { !isShell(res) && !isPython(res) && !isInternal(res) && !isR(res) } /** * Return whether the given primary resource represents a shell. */ private[deploy] def isShell(res: String): Boolean = { (res == SPARK_SHELL || res == PYSPARK_SHELL || res == SPARKR_SHELL) } /** * Return whether the given main class represents a sql shell. */ private[deploy] def isSqlShell(mainClass: String): Boolean = { mainClass == "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver" } /** * Return whether the given main class represents a thrift server. */ private def isThriftServer(mainClass: String): Boolean = { mainClass == "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2" } /** * Return whether the given primary resource requires running python. */ private[deploy] def isPython(res: String): Boolean = { res != null && res.endsWith(".py") || res == PYSPARK_SHELL } /** * Return whether the given primary resource requires running R. */ private[deploy] def isR(res: String): Boolean = { res != null && res.endsWith(".R") || res == SPARKR_SHELL } private[deploy] def isInternal(res: String): Boolean = { res == SparkLauncher.NO_RESOURCE } }

浙公网安备 33010602011771号