行动算子
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object reduceRDD { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf().setAppName("My scala word count").setMaster("local") val sc = new SparkContext(conf) //1. reduce:集合RDD中得所有元素。先聚合分区内得元素,再聚合分区间的元素 val rdd: RDD[Int] = sc.makeRDD(1 to 10,3) val reduce: Int = rdd.reduce((x,y)=>x+y) println("reduce",reduce)//55 val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1,"a"),(2,"b"),(3,"c"),(2,"e")),3) val reduce2: (Int, String) = rdd2.reduce((x,y)=>(x._1+y._1,x._2+y._2)) println("reduce2",reduce2)//(reduce2,(8,bcea)) //2. aggregate:分区内计算加初始值,分区间计算加初始值 val aggregate: Int = rdd.aggregate(10)(_+_,_+_) println("aggregate",aggregate) //(aggregate,95) //3.fold val fold: Int = rdd.fold(10)(_+_) println("fold",fold) //(fold,95) //2. count val count: Long = rdd.count() println("count",count) //10 //3.countByKey val countByKey: collection.Map[Int, Long] = rdd2.countByKey() println("countByKey",countByKey) //(countByKey,Map(3 -> 1, 1 -> 1, 2 -> 2)) //4.foreach rdd.foreach(println) //3.first val first: Int = rdd.first() println("first",first) //1 //4.take:前几个 val take: Array[Int] = rdd.take(3) println("take",take.mkString(",")) //1,2,3 //5. takeOrdered:排序后的前n个元素 val takeOrdered: Array[Int] = rdd.takeOrdered(3) println("takeOrdered",takeOrdered.mkString(",")) //1,2,3 } } //
posted on 2020-09-24 10:57 happygril3 阅读(131) 评论(0) 收藏 举报
浙公网安备 33010602011771号