package com.bjsxt.scala.spark.operator
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.math.Ordering
object TakeOrderOperator {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local")
conf.setAppName("TakeOrderOperator")
val sc = new SparkContext(conf)
val rdd = sc.parallelize(List(2, 4, 9, 1, 4, 6, 2))
/**
* 柯里化函数(高阶函数的简化版)
* def fun1(v1:Int):(Int)=>Int =>{
* def fun2(v2:Int)=>{
* v1+v2
* }
* fun2
* }
* ==
* def fun1(a:Int)(b:Int) => a+b
* fun1(1)(2)
*/
val takeOrderArr = rdd.takeOrdered(3)(new MyOdering)
rdd.top(3)(new MyOdering).foreach(println)
/**
* seed 种子:
* 种子是这个算子内部随机算法的初始值 默认是时间戳
* 在做机器学习的时候,一般将seed种子写死
*
* Array[RDD[Int]]
*/
rdd.randomSplit(Array(0.8,0.2), 1L).map { rdd => println(rdd.count) }
/**
* cartesian
*/
val rdd1 = sc.makeRDD(1 to 10,2)
val rdd2 = sc.makeRDD(11 to 20,2)
val cartesianRDD = rdd1.cartesian(rdd2)
println(cartesianRDD.count())
// takeOrderArr.foreach(println)
sc.stop()
}
}
class MyOdering extends Ordering[Int] {
def compare(x: Int, y: Int): Int = {
if (x > y) {
-1
} else
1
}
}