spark streaming整合kafka
版本说明:spark:2.2.0; kafka:0.10.0.0
object StreamingDemo {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.WARN)
Logger.getLogger("org.apache.kafka.clients.consumer").setLevel(Level.WARN)
val warehouseLocation = new File("hdfs://user/hive/warehouse").getAbsolutePath
val bootstrapServers = "192.168.156.111:9092,192.168.156.111:9092,192.168.156.111:9092"
val spark: SparkSession = SparkSession
.builder()
.appName("Spark SQL To Hive")
.config("spark.sql.warehouse.dir", warehouseLocation)
.master("local[4]")
.enableHiveSupport()
.getOrCreate()
spark.conf.set("spark.streaming.concurrentJobs", 10)
spark.conf.set("spark.streaming.kafka.maxRetries", 50)
spark.conf.set("spark.streaming.stopGracefullyOnShutdown", true)
spark.conf.set("spark.streaming.backpressure.enabled", true)
spark.conf.set("spark.streaming.backpressure.initialRate", 5000)
spark.conf.set("spark.streaming.kafka.maxRatePerPartition", 3000)
@transient
val sc: SparkContext = spark.sparkContext
val ssc: StreamingContext = new StreamingContext(sc, Seconds(5))
//kafka params
val kafkaParams = Map[String, Object](
"auto.offset.reset" -> "latest",
"value.deserializer" -> classOf[StringDeserializer],
"key.deserializer" -> classOf[StringDeserializer],
"bootstrap.servers" -> bootstrapServers,
"group.id" -> "test-consumer-group",
"enable.auto.commit" -> (true: java.lang.Boolean)
)
var stream: InputDStream[ConsumerRecord[String, String]] = null
val topics = Array("test")
stream = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
)
stream.foreachRDD(rdd => {
val cache_rdd: RDD[String] = rdd.map(x => x.value()).cache()
cache_rdd.foreach(println)
})
ssc.start()
ssc.awaitTermination()
}
}

浙公网安备 33010602011771号