import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.Partitioner
import org.apache.spark.HashPartitioner
object Demo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("app")
val sc = new SparkContext(conf)
val data = sc.textFile("F:\\test\\test\\ssort.txt")
//先分区, 再区内排序
data.map{x=>
val arr = x.split(" ")
(arr(0),arr(1).toInt)
}.partitionBy(new MySparkPartition(2)).mapPartitions{x=>
//此处的sortBy为scala中list集合的方法, 与Spark中RDD的sortBy方法不一样,注意区分!!!
x.toList.sortBy{case(x,y)=>
(x, -y)
}.toIterator
}.saveAsTextFile("F:\\test\\test\\output")
//data.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).top(3)(Ordering.by(_._2)).foreach(println)
/*data.map{x=>
(new SecondarySortKey(x.split(" ")(0), x.split(" ")(1).toInt))
}.sortBy(x=>x, true).map{x=>(x.first, x.second)}.foreach(println)*/
/*data.sortBy({x=>
(new SecondarySortKey(x.split(" ")(0), x.split(" ")(1).toInt))
}, true).foreach(println)*/
/*val data1 = data.sortBy({x=>
(new SecondarySortKey(x.split(" ")(0),x.split(" ")(1).toInt))
},true).map{x=>
val arr = x.split(" ")
(arr(0), arr(1))
}.partitionBy(new MySparkPartition(2)).saveAsTextFile("F:\\test\\test\\output")*/
/*val l1 = List[(String,Int)](("a",1),("b",2),("d",4),("c",3),("a",2))
//l1.sortBy(x=>(x._1,x._2))(Ordering.Tuple2(Ordering.String,Ordering.Int.reverse))
l1.sortBy{case(x,y) =>
(x, -y)
}
.foreach(println)*/
}
}
class MySparkPartition(numsPartitions:Int) extends Partitioner{
def numPartitions:Int = numsPartitions
override def getPartition(key:Any):Int={
if(key == "aa"){
return 1
}else{
return 0
}
}
}
class SecondarySortKey(val first:String, val second:Int) extends Ordered[SecondarySortKey] with Serializable{
def compare(other:SecondarySortKey):Int={
var comp = this.first.compareTo(other.first)
if(comp == 0){
other.second.compareTo(this.second)
}else{
comp
}
}
}