基于 DataStream API 实现欺诈检测、Flink的定时器
基于 DataStream API 实现欺诈检测
实时场景
Flink的定时器
package com.shujia.flink.core
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimerService
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
object Demo6Fraud {
  def main(args: Array[String]): Unit = {
    /*
    数据:
    用户编号,操作金额
    1001,123
    1001,23
    1001,1
    1001,1000
    1001,300
    1002,2
    1002,0.5
    1002,200
    1002,0.5
    1002,6000
    */
    /**
      * 对于一个账户,如果出现小于 $1 美元的交易后紧跟着一个大于 $500 的交易,就输出一个报警信息。
      *
      */
    
    //创建Flink运行环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //读取数据 -- 开启socket
    val eventDS: DataStream[String] = env.socketTextStream("master", 8888)
    //处理数据   
    val kvDS: DataStream[(String, Double)] = eventDS.map(line => {
      val split: Array[String] = line.split(",")
      (split(0), split(1).toDouble)
    })
    //按照用户分组
    val keyByDS: KeyedStream[(String, Double), String] = kvDS.keyBy(_._1)
    /**
      * 进行欺诈检查
      * 返回结果
      * 用户编号,前一次的金额,后一次的金额
      *
      * 如果出现小于 $1 美元的交易后紧跟着一个大于 $500 的交易,就输出一个报警信息。
      *
      * 两次行为必须在一分钟内完成才发出报警
      *
      */
    val filterDS: DataStream[(String, Double, Double)] = keyByDS.process(new KeyedProcessFunction[String, (String, Double), (String, Double, Double)] {
      /**
        * 用来保存金额小于1的数据
        *
        */
      var valueState: ValueState[Double] = _
      override def open(parameters: Configuration): Unit = {
        val context: RuntimeContext = getRuntimeContext
        valueState = context.getState(new ValueStateDescriptor[Double]("money", classOf[Double]))
      }
      override def processElement(
                                   value: (String, Double),
                                   ctx: KeyedProcessFunction[String, (String, Double), (String, Double, Double)]#Context,
                                   out: Collector[(String, Double, Double)]): Unit = {
        val (id, money) = value
        //获取上一次的金额
        val lastMoney: Double = valueState.value()
        if (lastMoney != 0) {
          if (money > 500) {
            //出一个报警信息。
            out.collect((id, lastMoney, money))
          } else {
            //恢复到初始状态
            valueState.update(0)
          }
        }
        if (money < 1.0) {
          //将当前的金额保存到状态中
          valueState.update(money)
          println("注册定时器")
          //获取定时器对象
          val timerService: TimerService = ctx.timerService()
          //获取当前处理事件
          val currTime: Long = timerService.currentProcessingTime()
          //注册定时器 -- 可以使用会话窗口
          /**
            *
            * 同时只能有一个定时器,按照最新的为准 -- 所以不删除定时器也行
            *
            * 当时间达到指定时间之后会触发onTimer方法的执行
            *
            */
          //一分钟之后触发onTimer执行
          timerService.registerProcessingTimeTimer(currTime + 60000)
        }
      }
      override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[String, (String, Double), (String, Double, Double)]#OnTimerContext, out: Collector[(String, Double, Double)]): Unit = {
        println("onTimer")
        //恢复到初始状态
        valueState.update(0)
      }
    })
    filterDS.print()
    env.execute()
  }
}
下面附上官网的完整程序
package spendreport
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.api.scala.typeutils.Types
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.util.Collector
import org.apache.flink.walkthrough.common.entity.Alert
import org.apache.flink.walkthrough.common.entity.Transaction
object FraudDetector {
  val SMALL_AMOUNT: Double = 1.00
  val LARGE_AMOUNT: Double = 500.00
  val ONE_MINUTE: Long     = 60 * 1000L
}
@SerialVersionUID(1L)
class FraudDetector extends KeyedProcessFunction[Long, Transaction, Alert] {
  @transient private var flagState: ValueState[java.lang.Boolean] = _
  @transient private var timerState: ValueState[java.lang.Long] = _
  @throws[Exception]
  override def open(parameters: Configuration): Unit = {
    val flagDescriptor = new ValueStateDescriptor("flag", Types.BOOLEAN)
    flagState = getRuntimeContext.getState(flagDescriptor)
    val timerDescriptor = new ValueStateDescriptor("timer-state", Types.LONG)
    timerState = getRuntimeContext.getState(timerDescriptor)
  }
  override def processElement(
      transaction: Transaction,
      context: KeyedProcessFunction[Long, Transaction, Alert]#Context,
      collector: Collector[Alert]): Unit = {
    // Get the current state for the current key
    val lastTransactionWasSmall = flagState.value
    // Check if the flag is set
    if (lastTransactionWasSmall != null) {
      if (transaction.getAmount > FraudDetector.LARGE_AMOUNT) {
        // Output an alert downstream
        val alert = new Alert
        alert.setId(transaction.getAccountId)
        collector.collect(alert)
      }
      // Clean up our state
      cleanUp(context)
    }
    if (transaction.getAmount < FraudDetector.SMALL_AMOUNT) {
      // set the flag to true
      flagState.update(true)
      val timer = context.timerService.currentProcessingTime + FraudDetector.ONE_MINUTE
      context.timerService.registerProcessingTimeTimer(timer)
      timerState.update(timer)
    }
  }
  override def onTimer(
      timestamp: Long,
      ctx: KeyedProcessFunction[Long, Transaction, Alert]#OnTimerContext,
      out: Collector[Alert]): Unit = {
    // remove flag after 1 minute
    timerState.clear()
    flagState.clear()
  }
  @throws[Exception]
  private def cleanUp(ctx: KeyedProcessFunction[Long, Transaction, Alert]#Context): Unit = {
    // delete timer
    val timer = timerState.value
    ctx.timerService.deleteProcessingTimeTimer(timer)
    // clean up all states
    timerState.clear()
    flagState.clear()
  }
}
 
                    
                     
                    
                 
                    
                
 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号