import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
object Spark05_Bc {
def main(args: Array[String]): Unit = {
val sparConf = new SparkConf().setMaster("local").setAppName("Acc")
val sc = new SparkContext(sparConf)
val rdd1 = sc.makeRDD(List(
("a", 1),("b", 2),("c", 3)
))
// val rdd2 = sc.makeRDD(List(
// ("a", 4),("b", 5),("c", 6)
// ))
val map = mutable.Map(("a", 4),("b", 5),("c", 6))
// join会导致数据量几何增长,并且会影响shuffle的性能,不推荐使用
//val joinRDD: RDD[(String, (Int, Int))] = rdd1.join(rdd2)
//joinRDD.collect().foreach(println)
// (a, 1), (b, 2), (c, 3)
// (a, (1,4)),(b, (2,5)),(c, (3,6))
rdd1.map {
case (w, c) => {
val l: Int = map.getOrElse(w, 0)
(w, (c, l))
}
}.collect().foreach(println)
sc.stop()
}
}