spark广播 变量2(实际使用)

增加
val broadcase = sc.broadcast(stu_info_list)

package com.matthew.bigdata.spark.core

import org.apache.spark.sql.SparkSession

object Demo15 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("demo14").master("local").getOrCreate()
    val sc = spark.sparkContext

    val stu_info_list = List((1, "张大仙", 11), (2, "王晓霞", 13), (3, "张甜甜", 11), (4, "王大力", 11))
    val score_info_rdd = sc.parallelize(List((1, "语文", 99), (2, "数学", 99), (3, "英语", 99), (4, "编程", 99), (1, "语文", 99), (2, "编程", 88),
      (3, "语文", 77), (4, "英语", 73), (1, "语文", 77), (3, "英语", 66), (2, "编程", 99)))

    val broadcase = sc.broadcast(stu_info_list)

    def map_func(data:Tuple3[Int,String,Int]): Tuple3[String,String,Int] ={
      val id = data._1
      var name=""
      for ( elem<- broadcase.value) {
        if (id == elem._1){
          name=elem._2
        }
      }
      return (name,data._2,data._3)
    }
    score_info_rdd.map(map_func).collect().foreach(println)
  }



}

  感谢传智播客提供的pyspark课程,该文中为scala语言版本代码,请大家参考,课程地址黑马程序员Spark全套视频教程,4天spark3.2快速入门到精通,全网首套基于Python语言的spark教程_哔哩哔哩_bilibili

posted @ 2022-07-21 15:12  孤独的执行者  阅读(41)  评论(0)    收藏  举报