1 ##### 二、启动服务 2 3 各节点执行: 4 ./kafka-server-start.sh -daemon ../config/server.properties 5 6 创建topic: 7 [root@node1 bin]# ./kafka-topics.sh --create --zookeeper 192.168.182.147:2181 --replication-factor 2 --partitions 1 --topic test 8 9 查看topic列表: 10 [root@hdp1 bin]# ./kafka-topics.sh --list --zookeeper 192.168.182.147:2181 11 12 生产者: 13 [root@hdp1 bin]# ./kafka-console-producer.sh --broker-list 192.168.182.146:9092 --topic test 14 15 ```shell 16 ./kafka-console-producer.sh --broker-list 192.168.182.147:9092,192.168.182.148:9092,192.168.182.149:9092 --topic test 17 ``` 18 19 20 21 消费者: 22 [root@hdp2 bin]#./kafka-console-consumer.sh --bootstrap-server 192.168.182.147:9092 --topic test --from-beginning 23 24 查看topic信息: 25 [root@hdp1 bin]# ./kafka-topics.sh --describe --zookeeper 192.168.182.146:2181 --topic test
package com.bawei.foryk //SparkStreaming Kafka Mysql 的单词统计 (StreamWC) //流处理 消费kafka 单词统计 import java.sql.DriverManager import com.mysql.jdbc.{Connection, PreparedStatement} import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.dstream.{DStream, InputDStream} import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe import org.apache.spark.streaming.kafka010.KafkaUtils import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent object SparkStreaming_Kafka_Mysql_StreamWC { def main(args: Array[String]): Unit = { val sparkConf: SparkConf = new SparkConf().setAppName("SparkStreaming_Kafka_Mysql_StreamWC").setMaster("local[2]") val sparkContext = new SparkContext(sparkConf) //sparkContext.setLogLevel("WARN") val ssc = new StreamingContext(sparkContext, Seconds(5)) val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "hadoop101:9092,hadoop102:9092,hadoop103:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "group1" ) //5、定义一个topics ,是一个集合,可以存放多个topic val topics = Set("test") //6、利用KafkaUtils.createDirectStream构建Dstream val kafkaTopicDS: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc, PreferConsistent, Subscribe[String, String](topics, kafkaParams)) //获取kafka中topic的数据 val socketline: DStream[String] = kafkaTopicDS.map(x => x.value()) //单词统计 //(hello,1) val mapRDD: DStream[(String, Int)] = socketline.flatMap(_.split(" ")).map((_, 1)) //统计单词出现的次数 //mapRDD.reduceByKey(_+_).print() //换算子 把内容保存在mysql中 mapRDD.reduceByKey(_ + _).foreachRDD(rrd => { rrd.foreach(tuple => { //tuple : (hello,5) //把tuple对应得单词和数量保存到mysql //建立jdbc连接 val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", "root", "123") val sql = "insert into wordcount (word,count) values ('" + tuple._1 + "','" + tuple._2 + "')" conn.prepareStatement(sql).executeUpdate() println("保存结束--------------------------------------------------------------") }) }) ssc.start() ssc.awaitTermination() } }