1.介绍
Spark应用程序配置配置对象,负责为 SparkContext 对象加载 各种提交参数
维护了 一个 ConcurrentHashMap结构, 用来 key-value 来存储 spark的提交参数
SparkConf 对象一旦被提交,将会调用 clone方法,克隆原来对象,所有他是不支持 运行时修改参数配置的
这是设置的 spark参数是优先级最高的 SparkContext.set > spark-submit --xxx
2.构造器
object spark_SparkConf_constructor {
def main(args: Array[String]): Unit = {
/*
* TODO 带参 构造器
* class SparkConf(loadDefaults: Boolean)
*
* TODO 无参 构造器
* def this() = this(true)
*
* TODO 参数 loadDefaults 的作用
* true : 加载 JVM(spark.*)系统变量
* false : 不加载 JVM(spark.*)系统变量
*
* */
// 设置 JVM系统变量
System.setProperty("spark.key", "value1")
System.setProperty("hadoop.key", "value2")
// TODO 无参构造 loadDefaults=ture
val sc1: SparkConf = new SparkConf()
println(s"spark.key:${sc1.get("spark.key", "未找到")}")
println(s"hadoop.key:${sc1.get("hadoop.key", "未找到")}")
// spark.key: value1
// hadoop.key: 未找到
// TODO 带参构造 loadDefaults=false
val sc2: SparkConf = new SparkConf(false)
println(s"spark.key:${sc2.get("spark.key", "未找到")}")
println(s"hadoop.key:${sc2.get("hadoop.key", "未找到")}")
// spark.key: 未找到
// hadoop.key: 未找到
}
}
3. 设置 spark参数
object spark_SparkConf_set {
def main(args: Array[String]): Unit = {
/*
* TODO 设置 SparkConf 参数两种方法
* 1. 加载 JVM中 spark.开头 系统变量
* new SparkConf() | new SparkConf(true)
* 2. 使用 提供的 set方法
*
* */
val sc: SparkConf = new SparkConf
sc.set("user", "张飞")
sc.setMaster("local")
sc.setAppName("Test SparkConf")
// 获取 所有参数
val arr: Array[(String, String)] = sc.getAll
arr.foreach(println(_))
// (spark.master, local)
// (user, 张飞)
// (spark.app.name, Test SparkConf)
}
}
4. 获取 spark参数
object spark_SparkConf_get {
def main(args: Array[String]): Unit = {
val conf = new SparkConf
conf.setAppName("test SparkConf")
conf.setMaster("local")
// TODO 通过 key 获取 value,找不到 抛出异常
// conf.get("name")
// getOption(key).getOrElse(throw new NoSuchElementException(key))
// TODO 通过 key 获取 value,找不到 使用默认值
val value = conf.get("name", "defaultValue")
println(value)
// defaultValue
// TODO 获取所有 key-value
val arr: Array[(String, String)] = conf.getAll
arr.foreach(println(_))
// (spark.master, local)
// (spark.app.name, test SparkConf)
}
}
5.查看 spark参数
org.apache.spark.deploy.yarn.config
6.源码阅读
/*********TODO SparkContext 初始化 源码分析 (基于 Spark 3.0.0)*******************************************************/
org.apache.spark.SparkConf
/**
* Configuration for a Spark application. Used to set various Spark parameters as key-value pairs.
* => 配置 spark application, 用键值对 来配置 spark的各种参数
*
* Most of the time, you would create a SparkConf object with `new SparkConf()`, which will load
* values from any `spark.*` Java system properties set in your application as well. In this case,
* parameters you set directly on the `SparkConf` object take priority over system properties.
* => 大多时候 使用 new SparkConf() 来创建对象, 这样会加载 JVM中 spark.* 参数
*
*
* All setter methods in this class support chaining. For example, you can write
* `new SparkConf().setMaster("local").setAppName("My app")`.
* => 所有的 setter方法都会返回 对象本身,所有你可以使用 new SparkConf().setMaster("local").setAppName("My app")
*
* @param loadDefaults whether to also load values from Java system properties
* => true,加载 JVM中的系统参数 false:不加载 JVM中的系统参数
*
* @note Once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
* by the user. Spark does not support modifying the configuration at runtime.
* => SparkConf 对象一旦被提交,将会调用 clone方法,克隆原来对象,所有他是不支持 运行时修改参数配置的
*/
class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Serializable {
import SparkConf._
/* TODO 无参构造器, 默认加载 JVM中 spark.开头的环境变量 */
def this() = this(true)
/* TODO 成员变量, HashMap[String, String] 用来存储 所有配置项 */
private val settings = new ConcurrentHashMap[String, String]()
/* TODO 通过 loadDefaults 来判断 是否 JVM中的环境变量 */
if (loadDefaults) {
loadFromSystemProperties(false)
}
/* TODO 加载 JVM中 spark.开头的 环境变量 */
private[spark] def loadFromSystemProperties(silent: Boolean): SparkConf = {
// Load any spark.* system properties
for ((key, value) <- Utils.getSystemProperties if key.startsWith("spark.")) {
set(key, value, silent)
}
this
}
/** Set a configuration variable. */
def set(key: String, value: String): SparkConf = {
set(key, value, false)
}
private[spark] def set(key: String, value: String, silent: Boolean): SparkConf = {
if (key == null) {
throw new NullPointerException("null key")
}
if (value == null) {
throw new NullPointerException("null value for " + key)
}
if (!silent) {
logDeprecationWarning(key)
}
settings.put(key, value)
this
}
/**
* The master URL to connect to, such as "local" to run locally with one thread, "local[4]" to
* run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
*/
def setMaster(master: String): SparkConf = {
set("spark.master", master)
}
/** Set a name for your application. Shown in the Spark web UI. */
def setAppName(name: String): SparkConf = {
set("spark.app.name", name)
}
/** Set JAR files to distribute to the cluster. */
def setJars(jars: Seq[String]): SparkConf = {
for (jar <- jars if (jar == null)) logWarning("null jar passed to SparkContext constructor")
set(JARS, jars.filter(_ != null))
}
/** Set JAR files to distribute to the cluster. (Java-friendly version.) */
def setJars(jars: Array[String]): SparkConf = {
setJars(jars.toSeq)
}
/** Remove a parameter from the configuration */
def remove(key: String): SparkConf = {
settings.remove(key)
this
}
private[spark] def remove(entry: ConfigEntry[_]): SparkConf = {
remove(entry.key)
}
/** Get a parameter; throws a NoSuchElementException if it's not set */
def get(key: String): String = {
getOption(key).getOrElse(throw new NoSuchElementException(key))
}
/** Get a parameter, falling back to a default if not set */
def get(key: String, defaultValue: String): String = {
getOption(key).getOrElse(defaultValue)
}
/** Get all parameters as a list of pairs */
def getAll: Array[(String, String)] = {
settings.entrySet().asScala.map(x => (x.getKey, x.getValue)).toArray
}
/** Copy this object */
override def clone: SparkConf = {
val cloned = new SparkConf(false)
settings.entrySet().asScala.foreach { e =>
cloned.set(e.getKey(), e.getValue(), true)
}
cloned
}
}
}