51_集合_MapReduceComplex(预聚合)

import scala.collection.mutable

object MapReduceComplex extends App {
  var tp1 = ("Hello Scala Spark World ", 4)
  var tp2 = ("Hello Scala Spark", 3)
  var tp3 = ("Hello Scala", 2)
  var tp4 = ("Hello", 1)
  //1.数据准备
  val tupleList = List(tp1, tp2, tp3, tp4)
  println("tupleList : ", tupleList)

  //2.拆分
  def splitRule(tp: (String, Int)) = {
    var map: mutable.Map[String, Int] = mutable.Map()
    for (key <- tp._1.split(" ")) {
      map.update(key, tp._2)
    }
    map
  }

  var list1 = tupleList.map(splitRule)
  println("list1 : " + list1)

  //3.拉平
  var list2 = list1.flatten
  println("list2 : ", list2)

  //4.分组
  //def groupRule(tp: (String, Int)) = tp._1

  var list3 = list2.groupBy(tp => tp._1)
  println("list3 : " + list3)

  //5.计数 map的map函数接收的是一个二元组
  def sumRule(tp: (String, List[(String, Int)])) = {

    var sum = 0
    for (e <- tp._2) {
      sum += e._2
    }
    (tp._1, sum)

  }

  var list4 = list3.map(sumRule _)
  println(list4)

  //6.排序
  var list5 = list4.toList.sortBy(tp => tp._2)
  println(list5)


}

 

posted @ 2021-11-04 20:43  学而不思则罔!  阅读(53)  评论(0)    收藏  举报