package com.bjsxt.scala.spark.operator
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.ListBuffer
object CartesianOperator {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("AggregateOperator").setMaster("local")
val sc = new SparkContext(conf)
val rdd1 = sc.parallelize(List((1, 3), (1, 2), (1, 2), (1, 9)))
val rdd2 = sc.parallelize(List((2, 3), (2, 2), (2, 4), (2, 9)))
val cartesianRDD = rdd1.cartesian(rdd2)
println("cartesianRDD.count():" + cartesianRDD.count())
val rdd3 = sc.parallelize(List(1,2,3,4,5,6,7,8,9))
val sampleArr = rdd3.takeSample(true, 100, 1)
cartesianRDD.foreach(println)
for(elem <- sampleArr){
println(elem)
}
println("==========================================================")
val orderRDD = rdd3.top(3)
for(elem <- orderRDD){
println(elem)
}
for(elem <- rdd3.takeOrdered(3)(new MyOrder)){
println(elem)
}
// val rdd5 = sc.makeRDD(Array((1,2),(1,2),(1,2),(2,2),(2,3)))
// rdd5.groupByKey().flatMap(x=>{
// val rest = new ListBuffer[(Int,String)]()
// val key = x._1
// val values = x._2
// val iterator = values.iterator
// val map = new scala.collection.mutable.HashMap[Int,Int]()
// while(iterator.hasNext){
// val list = ListBuffer
// val value = iterator.next()
// if(map.get(value)!=None){
// map.update(value,map.get(value).get+1)
// }else{
// map.+=((value,1))
// }
// }
//
// map.foreach(x=>{
// rest.append((key,x._1+"~"+x._2))
// })
// rest
// }).foreach(println)
sc.stop()
}
}
class MyOrder extends Ordering[Int]{
def compare(x: Int, y: Int): Int = {
if (x > y){
-1
}else{
1
}
}
}