package com.shujia.spark.streaming
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Durations, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
object Demo3RDDToDS {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder()
.appName("streaming")
.master("local[2]")
.config("spark.sql.shuffle.partitions", 1)
.getOrCreate()
import spark.implicits._
import org.apache.spark.sql.functions._
val sc: SparkContext = spark.sparkContext
/**
* 创建streaming 上下文对象,指定batch的间隔时间,多久计算一次
*
*/
val ssc = new StreamingContext(sc, Durations.seconds(5))
val linesDS: ReceiverInputDStream[String] = ssc.socketTextStream("master", 8888)
/**
* transform: 将ds 转换成rdd ,将将rdd转换成ds
*
*/
val ds1: DStream[(String, Int)] =linesDS.transform(rdd => {
val resultRDD: RDD[(String, Int)] = rdd.flatMap(_.split(","))
.map((_, 1))
.reduceByKey(_ + _)
//返回一个新的RDD
resultRDD
})
ds1.print()
//启动spark streaming
ssc.start()
ssc.awaitTermination()
ssc.stop()
}
}