spark_core01

package com.atguigu.bigata.spark.core.rdd.builder.operator.transform



import org.apache.spark.{SparkConf, SparkContext}

/**
 * @auther :${user}
 * @date :2022/2/14 23:36
 *
 */
object spark017_RDD_Operator_transform_AggregateByKey {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("Operator")
    val sc = new SparkContext(conf)
    val rdd = sc.parallelize(List(("a", 2), ("a", 1), ("b", 3), ("b", 4), ("b", 5), ("b", 6)), 2)


    /*
    第一个 参数将相同key的数据进行一个结构的转换
    第二个 参数 分区内的计算规则
    第三个 参数分区间的计算规则*/

    rdd.combineByKey(
      value => (value, 1),
      (tuple: (Int, Int), value) => {
        (tuple._1 + value, tuple._2 + 1)
      },
      (tuple1: (Int, Int), tuple2: (Int, Int)) => {
        (tuple1._1 + tuple2._1, tuple1._2 + tuple2._2)
      }
    ).mapValues {
      case (cnt, num) => {
        cnt / num
      }
    }.collect.foreach(println)

    /*
    1.分组内和分组外计算规则不相同,函数柯里化,第一个参数列表(初始值)用于比较,第二个参数列表(分区内计算规则,分区之间计算规则)
      aggregateByKey
      rdd.aggregateByKey(0)(Math.max,_+_).collect.foreach(println)
    2.分组内和分组外计算规则相同
      val rddFoldByKey = rdd.foldByKey(0)(_+_)

     */
    rdd.aggregateByKey((0, 0))(
      (t, v) => {
        (t._1 + v, t._2 + 1)
      },
      (t1, t2) => {
        (t1._1 + t2._1, t1._2 + t2._2)
      }
    ).mapValues {
      case (cnt, num) => cnt / num
    }.collect.foreach(println)


    println("=====================")

    //WordCount01 reduceByKey
    rdd.reduceByKey(_ + _).collect.foreach(println)
    println("=======================")

    //WordCount02 foldByKey
    rdd.foldByKey(0)(_ + _).foreach(println)
    println("=======================")

    //WordCount03 aggregateByKey
    rdd.aggregateByKey(0)(_ + _, _ + _).collect.foreach(println)
    println("=========================")

    //WordCount04 combineByKey
    rdd.combineByKey(v => v, (v1: Int, v2) => v1 + v2, (v1: Int, v2: Int) => v1 + v2).collect.foreach(println)
    sc.stop()
  }
}

    
posted @ 2022-02-15 20:30  ftwftw  阅读(76)  评论(0)    收藏  举报