import scala.collection.mutable
object MapReduceComplex extends App {
var tp1 = ("Hello Scala Spark World ", 4)
var tp2 = ("Hello Scala Spark", 3)
var tp3 = ("Hello Scala", 2)
var tp4 = ("Hello", 1)
//1.数据准备
val tupleList = List(tp1, tp2, tp3, tp4)
println("tupleList : ", tupleList)
//2.拆分
def splitRule(tp: (String, Int)) = {
var map: mutable.Map[String, Int] = mutable.Map()
for (key <- tp._1.split(" ")) {
map.update(key, tp._2)
}
map
}
var list1 = tupleList.map(splitRule)
println("list1 : " + list1)
//3.拉平
var list2 = list1.flatten
println("list2 : ", list2)
//4.分组
//def groupRule(tp: (String, Int)) = tp._1
var list3 = list2.groupBy(tp => tp._1)
println("list3 : " + list3)
//5.计数 map的map函数接收的是一个二元组
def sumRule(tp: (String, List[(String, Int)])) = {
var sum = 0
for (e <- tp._2) {
sum += e._2
}
(tp._1, sum)
}
var list4 = list3.map(sumRule _)
println(list4)
//6.排序
var list5 = list4.toList.sortBy(tp => tp._2)
println(list5)
}