1. join
/*
* 1.定义
* def join[W](other: RDD[(K, W)]): RDD[(K, (V, W))]
* def join[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (V, W))]
* 2.功能
* 将两个 类型为(K,V)和(K,W)的RDD 进行join,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
* */
object joinTest extends App {
val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")
val sc: SparkContext = new SparkContext(sparkconf)
val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)
private val joinRdd: RDD[(Int, (String, String))] = rdd1.join(rdd2)
joinRdd.collect().foreach(println(_))
/*(3,(关羽,蜀国))
(4,(曹操,魏国))
(1,(刘备,蜀国))
(5,(赵云,蜀国))
(2,(张飞,蜀国))
*/
sc.stop()
}
2.leftOuterJoin
/*
* 1.定义
* def leftOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (V, Option[W]))]
* def leftOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (V, Option[W]))]
* 2.功能
* 将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
* */
object leftOuterJoinTest extends App {
val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")
val sc: SparkContext = new SparkContext(sparkconf)
val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)
private val joinRdd: RDD[(Int, (String, Option[String]))] = rdd1.leftOuterJoin(rdd2)
joinRdd.collect().foreach(println(_))
/*(3,(关羽,Some(蜀国)))
(4,(曹操,Some(魏国)))
(1,(刘备,Some(蜀国)))
(7,(孙权,None))
(5,(赵云,Some(蜀国)))
(2,(张飞,Some(蜀国)))
*/
sc.stop()
}
3.rightOuterJoin
/*
* 1.定义
* def rightOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], W))]
* def rightOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (Option[V], W))]
* 2.功能
* 将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
* */
object rightOuterJoinTest extends App {
val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")
val sc: SparkContext = new SparkContext(sparkconf)
val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)
private val joinRdd: RDD[(Int, (Option[String], String))] = rdd1.rightOuterJoin(rdd2)
joinRdd.collect().foreach(println(_))
/*(6,(None,吴国))
(3,(Some(关羽),蜀国))
(4,(Some(曹操),魏国))
(1,(Some(刘备),蜀国))
(5,(Some(赵云),蜀国))
(2,(Some(张飞),蜀国))
*/
sc.stop()
}
4.fullOuterJoin
/*
* 1.定义
* def fullOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], Option[W]))]
* def fullOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (Option[V], Option[W]))]
* 2.功能
* 将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
* */
object fullOuterJoinTest extends App {
val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")
val sc: SparkContext = new SparkContext(sparkconf)
val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)
private val joinRdd = rdd1.fullOuterJoin(rdd2)
joinRdd.collect().foreach(println(_))
/*(6,(None,Some(吴国)))
(3,(Some(关羽),Some(蜀国)))
(4,(Some(曹操),Some(魏国)))
(1,(Some(刘备),Some(蜀国)))
(7,(Some(孙权),None))
(5,(Some(赵云),Some(蜀国)))
(2,(Some(张飞),Some(蜀国)))
*/
sc.stop()
}