Flink window Function - AggregateFunction

package window

import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.api.scala._

/**
 * @author: create by maoxiangyi
 * @version: v1.0
 * @description: window
 * @date:2019 /6/4
 */
object AggregateWordCount {
 def main(args: Array[String]): Unit = {
  //设置环境
  val env: StreamExecutionEnvironment = StreamExecutionEnvironment.createLocalEnvironment()
  //设置数据源
  env.addSource(new SourceFunction[String] {
   override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
    while (true) {
     ctx.collect("hello hadoop hello storm hello spark")
     Thread.sleep(1000)
    }
   }

   override def cancel(): Unit = {}
  })
   //计算逻辑
   .flatMap(_.split(" "))
   .map((_, 1))
   .keyBy(_._1)
   .timeWindow(Time.seconds(10), Time.seconds(10))


   .aggregate(new AggregateFunction[(String, Int), (String, Int), (String, Int)] {
    override def createAccumulator(): (String, Int) = {
     ("", 0)
    }
    override def add(value: (String, Int), accumulator: (String, Int)): (String, Int) = {
     (value._1, accumulator._2 + value._2)
    }
    override def getResult(accumulator: (String, Int)): (String, Int) = accumulator

    override def merge(a: (String, Int), b: (String, Int)): (String, Int) = {
     (a._1, a._2 + b._2)
    }
   }).print().setParallelism(1)
  env.execute("word count")
 }
}

posted @ 2019-06-05 09:49 春江师兄阅读(2240) 评论(0) 收藏举报

刷新页面返回顶部

春江师兄

关注大数据实时计算、机器学习，广告领域。

Flink window Function - AggregateFunction

公告