partitionBy

 

 

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}




object kvRDD {
  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("My scala word count").setMaster("local")
    val sc = new SparkContext(conf)

    val rdd: RDD[(Int, String)] = sc.makeRDD(List((1,"a"),(2,"b"),(3,"c"),(2,"e")),3)


    //partitionBy:对pairRDD进行分区操作,如果原有的partition和现有的partition一致的话就不进行分区,否则会产生shuffleRDD
    val rdd2: RDD[(Int, String)] = rdd.partitionBy(new org.apache.spark.HashPartitioner(2))
    //rdd2.saveAsTextFile("output")

    //(1,a)
    //(3,c)

    //(2,b)
    //(2,e)

  }

}

//

 

posted on 2020-09-21 18:58  happygril3  阅读(320)  评论(0)    收藏  举报

导航