Flink window Function - ProcessWindowFunction
package window
import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
/**
* @author: create by maoxiangyi
* @version: v1.0
* @description: window
* @date:2019 /6/4
*/
object ProcessWordCount {
def main(args: Array[String]): Unit = {
//设置环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.createLocalEnvironment()
//设置数据源
env.addSource(new SourceFunction[String] {
override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
while (true) {
ctx.collect("hello hadoop hello storm hello spark")
Thread.sleep(1000)
}
}
override def cancel(): Unit = {}
})
//计算逻辑
.flatMap(_.split(" "))
.map((_, 1))
.keyBy(_._1)
.timeWindow(Time.seconds(10), Time.seconds(10))
.process(new ProcessWindowFunction[(String, Int), (String, Int), String, TimeWindow] {
override def process(key: String, context: Context, elements: Iterable[(String, Int)], out: Collector[(String, Int)]): Unit = {
var value = 0;
elements.foreach(kv => {
value = value + kv._2
})
out.collect(key, value)
}
})
.print().setParallelism(1)
env.execute("word count")
}
}
个人网站:shuoyizui.com
公众号:写个框架玩
近期在公众号会发布一系列文章,主要是想完成一个简化的MapReduce框架的编写。实现Mapreduce编程模型、任务远程提交、任务分配、任务执行等功能。设计到了动态代理、反射、网络通信、序列化、消息队列、netty、自定义类加载器、多线程、shell等技术点。