PartionByOperator

package com.bjsxt.scala.spark.operator

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.spark.Partitioner
object PartionByOperator {
  def main(args: Array[String]) {
    val conf = new SparkConf().setAppName("JoinOperator")
      .setMaster("local")
    val sc = new SparkContext(conf)
    val nameList = Array(
      Tuple2(1,"xuruyun"),
      Tuple2(2,"liangyongqi"),
      Tuple2(3,"wangfei"),
     Tuple2(4,"yasaka"),
     Tuple2(5,"Angelababy"),
      Tuple2(6,"Angelababy")
    )
    val name = sc.parallelize(nameList,1)
    val partitionerRDD = name.partitionBy(new UDPartitioner())
    partitionerRDD.mapPartitionsWithIndex((index,iterator) => {
      println("partitionId:" + index)
      while(iterator.hasNext){
        println("value:" + iterator.next())
      }
      iterator
    }).count

    sc.stop()
}
   class UDPartitioner extends Partitioner{
     /**
       * 新产生的RDD的分区数
       * @return
       */
     override def numPartitions: Int = 3
     /**
       * 实现分区策略
       * @param key : RDD中的key值
       * @return
       */
    override def getPartition(key: Any): Int = {
       val intKey = key.toString.toInt
       if(intKey >=1 && intKey < 3){
         0
       }else if(intKey >=3 && intKey < 5){
         1
       }else {
         2
       }

     }
  }

}

  

posted @ 2018-06-18 14:50  uuhh  阅读(126)  评论(0)    收藏  举报