partitionBy
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object kvRDD { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf().setAppName("My scala word count").setMaster("local") val sc = new SparkContext(conf) val rdd: RDD[(Int, String)] = sc.makeRDD(List((1,"a"),(2,"b"),(3,"c"),(2,"e")),3) //partitionBy:对pairRDD进行分区操作,如果原有的partition和现有的partition一致的话就不进行分区,否则会产生shuffleRDD val rdd2: RDD[(Int, String)] = rdd.partitionBy(new org.apache.spark.HashPartitioner(2)) //rdd2.saveAsTextFile("output") //(1,a) //(3,c) //(2,b) //(2,e) } } //
posted on 2020-09-21 18:58 happygril3 阅读(320) 评论(0) 收藏 举报
浙公网安备 33010602011771号