join
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object joinRDD { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf().setAppName("My scala word count").setMaster("local") val sc = new SparkContext(conf) //join;根据key关联 val rdd: RDD[(Int, String)] = sc.makeRDD(List((3,"a"),(2,"a"),(1,"c")),3) val rdd2: RDD[(Int, Int)] = sc.makeRDD(Array((1,3),(2,6),(3,9))) val joinRDD: RDD[(Int, (String, Int))] = rdd.join(rdd2) joinRDD.collect().foreach(println) // (3,(a,9)) // (1,(c,3)) // (2,(a,6)) val rdd3: RDD[(Int, String)] = sc.makeRDD(List((3,"a"),(2,"a"),(1,"c"),(4,"e")),3) val joinRDD3: RDD[(Int, (String, Int))] = rdd3.join(rdd2) joinRDD3.collect().foreach(println) // (3,(a,9)) // (1,(c,3)) // (2,(a,6)) } } //
posted on 2020-09-23 19:41 happygril3 阅读(166) 评论(0) 收藏 举报
浙公网安备 33010602011771号