import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WordCountPlus {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local").setAppName("WordCountPlus")
val sc: SparkContext = new SparkContext(sparkConf)
val lines: RDD[String] = sc.textFile("input/*.txt")
//val word: RDD[String] = lines.flatMap(_.split(" ", -1))
//val word: RDD[String] = lines.flatMap(lines => lines.split(" ", 1))
val word: RDD[String] = lines.flatMap(flatMap)
//val word2One: RDD[(String, Int)] = word.map((_, 1))
//val word2One: RDD[(String, Int)] = word.map(word => { (word, 1) })
val word2One: RDD[(String, Int)] = word.map(map)
val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(_._1)
//val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(x => { (x._1) })
//val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(groupBy)
//val wordCount = groupBy.map(_._2.reduce((t1, t2) => { (t1._1, t1._2 + t2._2) }))
//val wordCount: RDD[(String, Int)] = groupBy.map(_._2.reduce(reduce))
//val wordCount: RDD[(String, Int)] = groupBy.map(x => { x._2.reduce(reduce) })
//val wordCount: RDD[(String, Int)] = groupBy.map(mapAndReduce)
val wordCount: RDD[(String, Int)] = groupBy.map(mapAndReduce)
wordCount.collect().foreach(println)
}
def mapAndReduce(word: (String, Iterable[(String, Int)])): (String, Int) = {
val res: (String, Int) = (word._1, word._2.reduce(reduce)._2)
res
}
def reduce(tuple2_1: (String, Int), tuple2_2: (String, Int)): (String, Int) = {
val res: (String, Int) = (tuple2_1._1, tuple2_1._2 + tuple2_2._2)
res
}
//传递的是规则
def groupBy(word: (String, Int)): String = {
val res = word._1
res
}
def map(word: String): (String, Int) = {
val res: (String, Int) = (word, 1)
res
}
def flatMap(lines: String): Array[String] = {
val res: Array[String] = lines.split(" ", -1)
res
}
/*
def mapAndReduce2(word: (String, Iterable[(String, Int)])): (String, Int) = {
val res: (String, Int) = (word._1,word._2.reduce(reduce2))
res
}
def reduce2(count1: Int, count2: Int): Int = {
count1 + count2
}
*/
}