Spark streaming 采用直接读kafka 方法获取数据

package com.xing.stream

import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * Created by DengNi on 2016/12/16.
  */
class StreamingFirst {

}

object StreamingFirst {

  def main(args: Array[String]) {

    val brokers = "192.168.184.188:9092, 192.168.184.178:9092, 192.168.184.168:9092"
    val topics = "meinv"


    val sparkconf = new SparkConf().setAppName("kafkastreaming").setMaster("local[2]")
    val ssc = new StreamingContext(sparkconf,Seconds(6))

    ssc.checkpoint("w_checkpoints")  //windows 路径

    val topicSet = topics.split(",").toSet
    val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)


    //{"@timestamp":"2016-12-14T16:26:21.746Z","beat":{"hostname":"root","name":"root","version":"5.1.1"},"metricset":{"module":"system","name":"process","rtt":28025},"system":{"process":{"cmdline":"\"C:\\WINDOWS\\system32\\SearchFilterHost.exe\" 0 624 628 644 8192 632 ","cpu":{"start_time":"2016-12-14T16:24:15.240Z","total":{"pct":0.000000}},"memory":{"rss":{"bytes":7495680,"pct":0.000400},"share":0,"size":1806336},"name":"SearchFilterHost.exe","pgid":0,"pid":8776,"ppid":2524,"state":"running","username":"NT AUTHORITY\\SYSTEM"}},"type":"metricsets"}
    val lines = KafkaUtils.createDirectStream[String, String,StringDecoder, StringDecoder](ssc,kafkaParams,topicSet)
    //val message = lines.map(_._1) map(_._1)  数据是空的 null
    val message = lines.map(_._2) //map(_._2)  才是Kafka里面打入的数据
    val words = message.flatMap(_.split(":"))

    val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
    wordCounts.print()
    //message.print()  checked
    
    ssc.start()
    ssc.awaitTermination()


  }

}



向kafka 大数据的程序 参考 http://blog.csdn.net/haohaixingyun/article/details/53647963

posted @ 2016-12-16 21:59  yuerspring  阅读(179)  评论(0编辑  收藏  举报