Flink 窗口的底层 API
Flink 窗口的底层 API
使用窗口的复杂处理
process : flink 底层 API , 可以操作 flink 的时间,事件,状态
W <: Wondow -- Scala泛型通配符,向下限定,W可以是Wondow及其子类
package com.shujia.flink.window
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
object Demo4ProcessFunction {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val linesDS: DataStream[String] = env.socketTextStream("master", 8888)
val kvDS: DataStream[(String, Int)] = linesDS.flatMap(_.split(",")).map((_, 1))
//将同一个单词分到同一个窗口中 -- 划分窗口
val windowDS: WindowedStream[(String, Int), String, TimeWindow] = kvDS
.keyBy(_._1)
.timeWindow(Time.seconds(5))
/**
* process : flink 底层 API , 可以操作 flink 的时间,事件,状态
*
*/
//ProcessWindowFunction[IN,OUT,KEY,W <: Wondow] --需要指定4个泛型
//W <: Wondow -- Scala泛型通配符,向下限定,W可以是Wondow及其子类
//IN:输入的类型
//OUT:输出的类型
//KEY:key的类型
//W:窗口的类型
//统计单词的数量,返回 单词,窗口结束时间,单词的数量
val countDS: DataStream[(String, Long, Int)] = windowDS.process(new ProcessWindowFunction[(String, Int), (String, Long, Int), String, TimeWindow] {
/**
* process: 每一个key对应的每一个窗口执行一次process方法
*
* @param key : key
* @param context : 上下文对象,可以获取到窗口的开始和结束时间
* @param elements : 这一个key在窗口内所有的数据,是一个迭代器
* @param out : 用于将数据发送到下游
*/
override def process(key: String,
context: Context,
elements: Iterable[(String, Int)],
out: Collector[(String, Long, Int)]): Unit = {
//一个单词在一个窗口内的数量
val count: Int = elements.size
//获取窗口的结束时间
val winEndTime: Long = context.window.getEnd
//将数据发送到下游
out.collect((key, winEndTime, count))
}
})
countDS.print()
env.execute()
}
}
1、卡口过车需求案例
数据:json格式
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00120","orientation":"西南","road_id":34053114,"time":1614711895,"speed":36.38} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00120","orientation":"西南","road_id":34053114,"time":1614711904,"speed":35.38} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01220","orientation":"西南","road_id":34053114,"time":1614711914,"speed":45.38} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00210","orientation":"西北","road_id":34053114,"time":1614711924,"speed":45.29} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01214","orientation":"西北","road_id":34053114,"time":1614712022,"speed":75.29} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00032","orientation":"西北","road_id":34053114,"time":1614712120,"speed":46.29} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01014","orientation":"西北","road_id":34053114,"time":1614712218,"speed":82.29} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00104","orientation":"西北","road_id":34053114,"time":1614712316,"speed":82.29} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00111","orientation":"西北","road_id":34053114,"time":1614712414,"speed":48.5} {"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01124","orientation":"西北","road_id":34053114,"time":1614712619,"speed":59.5} …………
2、解析 json 格式的数据
通过工具
Gson -- 谷歌提供
fastutil
fastjson -- 阿里云提供
……
导入 fastjson 依赖
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.79</version>
</dependency>
fastJson解析json字符串
package com.shujia.flink.window
import java.lang
import com.alibaba.fastjson.{JSON, JSONObject}
object Demo6Json {
def main(args: Array[String]): Unit = {
//因为外面有 " ,所以里面的 " 被转义了
val json = "{\"car\":\"皖A9A7N2\",\"city_code\":\"340500\",\"county_code\":\"340522\",\"card\":117988031603010,\"camera_id\":\"00012\",\"orientation\":\"西北\",\"road_id\":34053114,\"time\":1614714188,\"speed\":58.51}"
/**
* fastJson 解析 json字符串
*
*/
//parseObject() -- 将json字符串转换成json对象,json对象可以使用key获取value
val jsonObj: JSONObject = JSON.parseObject(json)
//直接通过key获取value
val card: String = jsonObj.getString("card")
val time: Long = jsonObj.getLong("time")
val speed: lang.Double = jsonObj.getDouble("speed")
println(card)
println(time)
println(speed)
}
}
3、实现需求
package com.shujia.flink.window
import java.lang
import com.alibaba.fastjson.{JSON, JSONObject}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
object Demo5Car {
def main(args: Array[String]): Unit = {
/**
* 实时读取卡口过车数据 -- 实时统计道路拥堵情况
* 拥堵判断条件
* 1、最近一段时间的平均车速
* 2、最近一段时间的车流量
*
* 计算最近10分钟的数据,每隔1分钟计算一次
*
*/
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
//设置时间模式
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//读取卡口过车数据
val linesDS: DataStream[String] = env.socketTextStream("master", 8888)
/**
* 解析json数据
*
*/
val carDS: DataStream[(String, Long, Double)] = linesDS.map(line => {
val jsonObj: JSONObject = JSON.parseObject(line)
//直接通过key获取value
val card: String = jsonObj.getString("card")
val time: Long = jsonObj.getLong("time")
val speed: Double = jsonObj.getDouble("speed")
//将time变成毫秒级别的
(card, time * 1000, speed)
})
/**
* 设置时间字段和水位线
*
*/
val assDS: DataStream[(String, Long, Double)] = carDS.assignTimestampsAndWatermarks(
//执行水位线前移的时间
new BoundedOutOfOrdernessTimestampExtractor[(String, Long, Double)](Time.seconds(5)) {
//指定时间戳字段, 指定的时间字段必须是毫秒级别
override def extractTimestamp(element: (String, Long, Double)): Long = element._2
}
)
/**
*
* 计算最近10分钟的数据,每隔1分钟计算一次
*/
val windowDS: WindowedStream[(String, Long, Double), String, TimeWindow] = assDS
//按照卡口分组
.keyBy(_._1)
//划分窗口
.timeWindow(Time.minutes(10), Time.minutes(1))
/**
* 1、最近一段时间的平均车速
* 2、最近一段时间的车流量
*
* 输出结果
* 卡口,窗口的结束时间,平均车速,车的数量
*/
val resultDS: DataStream[(String, Long, Double, Long)] = windowDS.process(new ProcessWindowFunction[(String, Long, Double), (String, Long, Double, Long), String, TimeWindow] {
override def process(key: String,
context: Context,
elements: Iterable[(String, Long, Double)],
out: Collector[(String, Long, Double, Long)]): Unit = {
var num = 0
var sumSpeed = 0.0
//(card, time, speed) <- elements -- 直接接收遍历出来的数据
for ((card, time, speed) <- elements) {
//统计车辆数量
num += 1
//总的车速
sumSpeed += speed
}
//计算平均车速
val avgSpeed: Double = sumSpeed / num
//获取窗口的结束时间
val endTIme: Long = context.window.getEnd
//将数据发送到下游
out.collect((key, endTIme, avgSpeed, num))
}
})
resultDS.print()
env.execute()
}
}

浙公网安备 33010602011771号