1 package com.xujunqi.zuoye 2 3 import java.util.Properties 4 5 import org.apache.flink.api.common.serialization.SimpleStringSchema 6 import org.apache.flink.streaming.api.scala._ 7 import org.apache.flink.streaming.api.windowing.time.Time 8 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011 9 10 object Licenseplate_Flink_Kafka { 11 def main(args: Array[String]): Unit = { 12 //设置运行环境 13 val env = StreamExecutionEnvironment.getExecutionEnvironment 14 //设置全局并行度 15 env.setParallelism(1) 16 //配置kafka数据源 17 val properties = new Properties() 18 //配置主机名和端口 19 properties.setProperty("bootstrap.servers", "hadoop102:9092") 20 //配置消费者组 21 properties.setProperty("group.id", "car") 22 //配置key的反序列化器 23 properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") 24 //配置value的反序列化器 25 properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") 26 //配置kafka偏移量 27 properties.setProperty("auto.offset.reset", "latest") 28 //获取数据 29 val stream: DataStream[String] = env.addSource(new FlinkKafkaConsumer011[String]("test", new SimpleStringSchema(), properties)) 30 31 //统计出5s内车牌出现的总次数并打印到控制台 32 stream.filter(_.nonEmpty).flatMap(_.split(" ")).map(x => (("car", 1))) 33 .keyBy(_._1) 34 .timeWindow(Time.seconds(5)) 35 .sum(1) 36 .print("5s内车牌出现的总次数") 37 38 //统计出5s内所有车牌尾号是偶数的车牌号码并打印到控制台 39 stream.filter(_.nonEmpty).flatMap(_.split(" ")).map(x => ((x.trim, 1))) 40 .filter(x => x._1.substring(x._1.length - 1).toInt % 2 == 0) 41 .keyBy(_._1) 42 .timeWindow(Time.seconds(5)) 43 .sum(1) 44 .print("5s内所有车牌尾号是偶数的车牌号码") 45 46 //统计出10s内同一车牌出现次数超过2次及以上的车牌号码并打印到控制台 47 stream.filter(_.nonEmpty).flatMap(_.split(" ")).map(x => ((x.trim, 1))) 48 .keyBy(_._1) 49 .timeWindow(Time.seconds(10)) 50 .sum(1) 51 .filter(x => x._2 >= 2) 52 .print("10s内同一车牌出现次数超过2次及以上的车牌号码") 53 54 env.execute() 55 } 56 }