package com.bjsxt.scala.spark.operator
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by root on 2016/6/13.
*/
object CountByKeyOperator {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("CountByKeyOperator")
.setMaster("local")
val sc = new SparkContext(conf)
val studentList = Array(Tuple2("80s","yulei"),Tuple2("80s","fengqili")
,Tuple2("80s","gaohaitao"),Tuple2("70s","wangfei"),Tuple2("70s","xuruyun")
,Tuple2("70s","xuwei"))
/**
* students 是一个KV格式的RDD
* k:s
* v:name
* reducebykey,countByKey区别:
* 相同点:都会根据key来分组
* 不同点:
* 1、reducebykey会根据用户传入的聚合逻辑对组内的数据进行聚合、countByKey不需要用户传入聚合逻辑,他是直接对组内的数据进行统计记录数
* 2、reducebykey是transformation类算子 countByKey是action类算子
*/
val students = sc.parallelize(studentList)
val counts = students.countByKey()
/**
* 80s 3
* 70s 3
*/
println(counts)
}
}