cogroup
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object cogroupRDD { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf().setAppName("My scala word count").setMaster("local") val sc = new SparkContext(conf) //join;根据key关联 val rdd: RDD[(Int, String)] = sc.makeRDD(List((3,"a"),(2,"a"),(1,"c")),3) val rdd2: RDD[(Int, Int)] = sc.makeRDD(Array((1,3),(2,6),(3,9))) val cogroupRDD: RDD[(Int, (Iterable[String], Iterable[Int]))] = rdd.cogroup(rdd2) cogroupRDD.collect().foreach(println) // (3,(CompactBuffer(a),CompactBuffer(9))) // (1,(CompactBuffer(c),CompactBuffer(3))) // (2,(CompactBuffer(a),CompactBuffer(6))) val rdd3: RDD[(Int, String)] = sc.makeRDD(List((3,"a"),(2,"a"),(1,"c"),(4,"e")),3) val cogroupRDD3: RDD[(Int, (Iterable[String], Iterable[Int]))] = rdd3.cogroup(rdd2) cogroupRDD3.collect().foreach(println) // (3,(CompactBuffer(a),CompactBuffer(9))) // (4,(CompactBuffer(e),CompactBuffer())) // (1,(CompactBuffer(c),CompactBuffer(3))) // (2,(CompactBuffer(a),CompactBuffer(6))) } } //
posted on 2020-09-23 19:52 happygril3 阅读(131) 评论(0) 收藏 举报
浙公网安备 33010602011771号