package com.bjsxt.scala.spark.operator
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.spark.Partitioner
object PartionByOperator {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("JoinOperator")
.setMaster("local")
val sc = new SparkContext(conf)
val nameList = Array(
Tuple2(1,"xuruyun"),
Tuple2(2,"liangyongqi"),
Tuple2(3,"wangfei"),
Tuple2(4,"yasaka"),
Tuple2(5,"Angelababy"),
Tuple2(6,"Angelababy")
)
val name = sc.parallelize(nameList,1)
val partitionerRDD = name.partitionBy(new UDPartitioner())
partitionerRDD.mapPartitionsWithIndex((index,iterator) => {
println("partitionId:" + index)
while(iterator.hasNext){
println("value:" + iterator.next())
}
iterator
}).count
sc.stop()
}
class UDPartitioner extends Partitioner{
/**
* 新产生的RDD的分区数
* @return
*/
override def numPartitions: Int = 3
/**
* 实现分区策略
* @param key : RDD中的key值
* @return
*/
override def getPartition(key: Any): Int = {
val intKey = key.toString.toInt
if(intKey >=1 && intKey < 3){
0
}else if(intKey >=3 && intKey < 5){
1
}else {
2
}
}
}
}