1.定义
/*
* def repartition(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = withScope {
* coalesce(numPartitions, shuffle = true)
* }
*
* 1.功能
* 既能扩大分区也能缩小分区,并且都会走shuflle
* 2.repartition 和 coalesce 的区别
* repartition是对 coalesce(shuffle = true)的封装
* */
2.示例
object RddTransitionOperator_repartition extends App {
private val sc: SparkContext = CommonUtils.getSparkContext("groupBy repartition")
private val rdd: RDD[Long] = sc.range(1, 10, 1, 4)
//扩大分区
private val rdd2: RDD[Long] = rdd.repartition(5)
println(s"分区数:${rdd2.getNumPartitions}")
println(s"rdd类型:${rdd2.getClass.getName}")
// 分区数:2
// rdd类型:org.apache.spark.rdd.MapPartitionsRDD
rdd2.collect()
//缩小分区
private val rdd3: RDD[Long] = rdd.coalesce(2, true)
println(s"分区数:${rdd3.getNumPartitions}")
println(s"rdd类型:${rdd3.getClass.getName}")
// 分区数:2
// rdd类型:org.apache.spark.rdd.MapPartitionsRDD
rdd3.collect()
sc.stop()
//while (true) {}
}