51_集合_MapReduceComplex(预聚合)
import scala.collection.mutable object MapReduceComplex extends App { var tp1 = ("Hello Scala Spark World ", 4) var tp2 = ("Hello Scala Spark", 3) var tp3 = ("Hello Scala", 2) var tp4 = ("Hello", 1) //1.数据准备 val tupleList = List(tp1, tp2, tp3, tp4) println("tupleList : ", tupleList) //2.拆分 def splitRule(tp: (String, Int)) = { var map: mutable.Map[String, Int] = mutable.Map() for (key <- tp._1.split(" ")) { map.update(key, tp._2) } map } var list1 = tupleList.map(splitRule) println("list1 : " + list1) //3.拉平 var list2 = list1.flatten println("list2 : ", list2) //4.分组 //def groupRule(tp: (String, Int)) = tp._1 var list3 = list2.groupBy(tp => tp._1) println("list3 : " + list3) //5.计数 map的map函数接收的是一个二元组 def sumRule(tp: (String, List[(String, Int)])) = { var sum = 0 for (e <- tp._2) { sum += e._2 } (tp._1, sum) } var list4 = list3.map(sumRule _) println(list4) //6.排序 var list5 = list4.toList.sortBy(tp => tp._2) println(list5) }