TakeOrderOperator

package com.bjsxt.scala.spark.operator

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.math.Ordering

object TakeOrderOperator {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("TakeOrderOperator")
    val sc = new SparkContext(conf)
    val rdd = sc.parallelize(List(2, 4, 9, 1, 4, 6, 2))
    /**
     * 柯里化函数(高阶函数的简化版)
     * def fun1(v1:Int):(Int)=>Int =>{
     * 		def fun2(v2:Int)=>{
     * 			v1+v2
     * 		}
     * 		fun2
     * }
     * ==
     * def fun1(a:Int)(b:Int) => a+b
     * fun1(1)(2)
     */   
    val takeOrderArr = rdd.takeOrdered(3)(new MyOdering)
    rdd.top(3)(new MyOdering).foreach(println)
    /**
     * seed 种子:
     * 	种子是这个算子内部随机算法的初始值  默认是时间戳
     * 在做机器学习的时候,一般将seed种子写死
     * 
     * Array[RDD[Int]]
     */
    rdd.randomSplit(Array(0.8,0.2), 1L).map { rdd => println(rdd.count) } 
    /**
     * cartesian
     */   
    val rdd1 = sc.makeRDD(1 to 10,2)
    val rdd2 = sc.makeRDD(11 to 20,2)
    val cartesianRDD = rdd1.cartesian(rdd2)
    println(cartesianRDD.count())
//    takeOrderArr.foreach(println)
    sc.stop()
  }
}
class MyOdering extends Ordering[Int] {
  def compare(x: Int, y: Int): Int = {
    if (x > y) {
      -1
    } else
      1
  }
}

  

posted @ 2018-06-18 15:13  uuhh  阅读(81)  评论(0)    收藏  举报