Flink 窗口的底层 API

使用窗口的复杂处理

process : flink 底层 API , 可以操作 flink 的时间,事件,状态

W <: Wondow -- Scala泛型通配符,向下限定,W可以是Wondow及其子类

package com.shujia.flink.window

import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

object Demo4ProcessFunction {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    val linesDS: DataStream[String] = env.socketTextStream("master", 8888)

    val kvDS: DataStream[(String, Int)] = linesDS.flatMap(_.split(",")).map((_, 1))

    //将同一个单词分到同一个窗口中 -- 划分窗口
    val windowDS: WindowedStream[(String, Int), String, TimeWindow] = kvDS
      .keyBy(_._1)
      .timeWindow(Time.seconds(5))

    /**
      * process : flink 底层 API , 可以操作 flink 的时间,事件,状态
      *
      */
      
    //ProcessWindowFunction[IN,OUT,KEY,W <: Wondow] --需要指定4个泛型
    //W <: Wondow -- Scala泛型通配符,向下限定,W可以是Wondow及其子类
    //IN:输入的类型
    //OUT:输出的类型
    //KEY:key的类型
    //W:窗口的类型
    
    //统计单词的数量,返回 单词,窗口结束时间,单词的数量
    val countDS: DataStream[(String, Long, Int)] = windowDS.process(new ProcessWindowFunction[(String, Int), (String, Long, Int), String, TimeWindow] {

      /**
        * process: 每一个key对应的每一个窗口执行一次process方法
        *
        * @param key      : key
        * @param context  : 上下文对象,可以获取到窗口的开始和结束时间
        * @param elements : 这一个key在窗口内所有的数据,是一个迭代器
        * @param out      : 用于将数据发送到下游
        */
        
      override def process(key: String,
                           context: Context,
                           elements: Iterable[(String, Int)],
                           out: Collector[(String, Long, Int)]): Unit = {

        //一个单词在一个窗口内的数量
        val count: Int = elements.size

        //获取窗口的结束时间
        val winEndTime: Long = context.window.getEnd

        //将数据发送到下游
        out.collect((key, winEndTime, count))
      }
    })

    countDS.print()

    env.execute()

  }
}

1、卡口过车需求案例

数据:json格式

{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00120","orientation":"西南","road_id":34053114,"time":1614711895,"speed":36.38}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00120","orientation":"西南","road_id":34053114,"time":1614711904,"speed":35.38}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01220","orientation":"西南","road_id":34053114,"time":1614711914,"speed":45.38}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00210","orientation":"西北","road_id":34053114,"time":1614711924,"speed":45.29}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01214","orientation":"西北","road_id":34053114,"time":1614712022,"speed":75.29}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00032","orientation":"西北","road_id":34053114,"time":1614712120,"speed":46.29}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01014","orientation":"西北","road_id":34053114,"time":1614712218,"speed":82.29}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00104","orientation":"西北","road_id":34053114,"time":1614712316,"speed":82.29}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"00111","orientation":"西北","road_id":34053114,"time":1614712414,"speed":48.5}
{"car":"皖A9A7N2","city_code":"340500","county_code":"340522","card":117988031603010,"camera_id":"01124","orientation":"西北","road_id":34053114,"time":1614712619,"speed":59.5}
…………

2、解析 json 格式的数据

通过工具

Gson -- 谷歌提供

fastutil

fastjson -- 阿里云提供

……

导入 fastjson 依赖
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.79</version>
        </dependency>
fastJson解析json字符串
package com.shujia.flink.window

import java.lang

import com.alibaba.fastjson.{JSON, JSONObject}

object Demo6Json {
  def main(args: Array[String]): Unit = {
    
    //因为外面有 " ,所以里面的 " 被转义了
    val json = "{\"car\":\"皖A9A7N2\",\"city_code\":\"340500\",\"county_code\":\"340522\",\"card\":117988031603010,\"camera_id\":\"00012\",\"orientation\":\"西北\",\"road_id\":34053114,\"time\":1614714188,\"speed\":58.51}"

    /**
      * fastJson 解析 json字符串
      *
      */

    //parseObject() -- 将json字符串转换成json对象,json对象可以使用key获取value
    val jsonObj: JSONObject = JSON.parseObject(json)

    //直接通过key获取value
    val card: String = jsonObj.getString("card")
    val time: Long = jsonObj.getLong("time")
    val speed: lang.Double = jsonObj.getDouble("speed")
    println(card)
    println(time)
    println(speed)

  }
}

3、实现需求

package com.shujia.flink.window

import java.lang

import com.alibaba.fastjson.{JSON, JSONObject}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

object Demo5Car {

  def main(args: Array[String]): Unit = {

    /**
      * 实时读取卡口过车数据 -- 实时统计道路拥堵情况
      * 拥堵判断条件
      * 1、最近一段时间的平均车速
      * 2、最近一段时间的车流量
      *
      * 计算最近10分钟的数据,每隔1分钟计算一次
      *
      */

    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    env.setParallelism(1)

    //设置时间模式
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    //读取卡口过车数据
    val linesDS: DataStream[String] = env.socketTextStream("master", 8888)

    /**
      * 解析json数据
      *
      */
      
    val carDS: DataStream[(String, Long, Double)] = linesDS.map(line => {
      val jsonObj: JSONObject = JSON.parseObject(line)

      //直接通过key获取value
      val card: String = jsonObj.getString("card")
      val time: Long = jsonObj.getLong("time")
      val speed: Double = jsonObj.getDouble("speed")

      //将time变成毫秒级别的
      (card, time * 1000, speed)
    })

    /**
      * 设置时间字段和水位线
      *
      */

    val assDS: DataStream[(String, Long, Double)] = carDS.assignTimestampsAndWatermarks(
      //执行水位线前移的时间
      new BoundedOutOfOrdernessTimestampExtractor[(String, Long, Double)](Time.seconds(5)) {
        //指定时间戳字段, 指定的时间字段必须是毫秒级别
        override def extractTimestamp(element: (String, Long, Double)): Long = element._2
      }
    )

    /**
      *
      * 计算最近10分钟的数据,每隔1分钟计算一次
      */

    val windowDS: WindowedStream[(String, Long, Double), String, TimeWindow] = assDS
      //按照卡口分组
      .keyBy(_._1)
      //划分窗口
      .timeWindow(Time.minutes(10), Time.minutes(1))

    /**
      * 1、最近一段时间的平均车速
      * 2、最近一段时间的车流量
      *
      * 输出结果
      * 卡口,窗口的结束时间,平均车速,车的数量
      */

    val resultDS: DataStream[(String, Long, Double, Long)] = windowDS.process(new ProcessWindowFunction[(String, Long, Double), (String, Long, Double, Long), String, TimeWindow] {
      override def process(key: String,
                           context: Context,
                           elements: Iterable[(String, Long, Double)],
                           out: Collector[(String, Long, Double, Long)]): Unit = {

        var num = 0
        var sumSpeed = 0.0

        //(card, time, speed) <- elements -- 直接接收遍历出来的数据
        for ((card, time, speed) <- elements) {
          //统计车辆数量
          num += 1
          //总的车速
          sumSpeed += speed
        }

        //计算平均车速
        val avgSpeed: Double = sumSpeed / num

        //获取窗口的结束时间
        val endTIme: Long = context.window.getEnd

        //将数据发送到下游
        out.collect((key, endTIme, avgSpeed, num))
      }
    })

    resultDS.print()

    env.execute()

  }
}
posted @ 2022-03-22 15:34  阿伟宝座  阅读(97)  评论(0)    收藏  举报