2021寒假(17)

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable

object Spark05_Bc {

    def main(args: Array[String]): Unit = {

        val sparConf = new SparkConf().setMaster("local").setAppName("Acc")
        val sc = new SparkContext(sparConf)

        val rdd1 = sc.makeRDD(List(
            ("a", 1),("b", 2),("c", 3)
        ))
//        val rdd2 = sc.makeRDD(List(
//            ("a", 4),("b", 5),("c", 6)
//        ))
        val map = mutable.Map(("a", 4),("b", 5),("c", 6))



        // join会导致数据量几何增长,并且会影响shuffle的性能,不推荐使用
        //val joinRDD: RDD[(String, (Int, Int))] = rdd1.join(rdd2)
        //joinRDD.collect().foreach(println)
        // (a, 1),    (b, 2),    (c, 3)
        // (a, (1,4)),(b, (2,5)),(c, (3,6))
        rdd1.map {
            case (w, c) => {
                val l: Int = map.getOrElse(w, 0)
                (w, (c, l))
            }
        }.collect().foreach(println)



        sc.stop()

    }
}

 

posted @ 2021-01-18 00:18  祈欢  阅读(1)  评论(0编辑  收藏  举报