CartesianOperator

package com.bjsxt.scala.spark.operator

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.ListBuffer

object CartesianOperator {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("AggregateOperator").setMaster("local")
    val sc = new SparkContext(conf)
    val rdd1 = sc.parallelize(List((1, 3), (1, 2), (1, 2), (1, 9)))
    val rdd2 = sc.parallelize(List((2, 3), (2, 2), (2, 4), (2, 9)))
    val cartesianRDD = rdd1.cartesian(rdd2)
    println("cartesianRDD.count():" + cartesianRDD.count())
    val rdd3 = sc.parallelize(List(1,2,3,4,5,6,7,8,9))
    val sampleArr = rdd3.takeSample(true, 100, 1)
    cartesianRDD.foreach(println)
    for(elem <- sampleArr){
      println(elem)
    }
    println("==========================================================")
    val orderRDD = rdd3.top(3)
    for(elem <- orderRDD){
      println(elem)
    }
    for(elem <- rdd3.takeOrdered(3)(new MyOrder)){
      println(elem)
    }
//    val rdd5 = sc.makeRDD(Array((1,2),(1,2),(1,2),(2,2),(2,3)))
//    rdd5.groupByKey().flatMap(x=>{
//      val rest = new ListBuffer[(Int,String)]()
//      val key = x._1
//      val values = x._2
//      val iterator = values.iterator
//      val map = new scala.collection.mutable.HashMap[Int,Int]()
//      while(iterator.hasNext){
//        val list = ListBuffer
//        val value = iterator.next()
//        if(map.get(value)!=None){
//          map.update(value,map.get(value).get+1)
//        }else{
//          map.+=((value,1))
//        }
//      }
//      
//       map.foreach(x=>{
//         rest.append((key,x._1+"~"+x._2))
//       }) 
//       rest
//    }).foreach(println)
    sc.stop()
  }
}
class MyOrder extends Ordering[Int]{
  def compare(x: Int, y: Int): Int = {
    if (x > y){
      -1
    }else{
      1
    }
  }
}

  

posted @ 2018-06-18 14:13  uuhh  阅读(93)  评论(0)    收藏  举报