Flink窗口 - 1

  •  简介

 

 

 

  • 计数窗口
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.AllWindowedStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;

public class CountWindowDemo {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());

        DataStreamSource<String> lines = env.socketTextStream("localhost", 8899);

        SingleOutputStreamOperator<Integer> numStream = lines.map(new MapFunction<String, Integer>() {
            @Override
            public Integer map(String s) throws Exception {
                return Integer.parseInt(s);
            }
        });

        // 每5次统计一下 - 滚动
        // AllWindowedStream<Integer, GlobalWindow> countWindowAll = numStream.countWindowAll(5);

        // 每5此统计近10次的值 - 滑动
        AllWindowedStream<Integer, GlobalWindow> countWindowAll = numStream.countWindowAll(10, 5);
        
        // 求和
        countWindowAll.sum(0).print();

        env.execute();
    }
}

 

  • 基于处理时间的窗口
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

public class ProcessingTimeWindowDemo {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());

        // socket数据输入:spark,hadoop,flink
        DataStreamSource<String> streamSource = env.socketTextStream("localhost", 8899);

        // 将输入数据按逗号分割,转换成Tuple2
        SingleOutputStreamOperator<Tuple2<String, Integer>> streamOperator = streamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
                String[] split = value.split(",");
                for (String s : split) {
                    out.collect(Tuple2.of(s, 1));
                }
            }
        });

        // 指定使用窗口的时间
        env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

        // 1.No-keyed  - 不分组-只统计单词出现次数
        // 滚动窗口 每5秒统计一次
        // streamOperator.timeWindowAll(Time.seconds(5)).sum(1).print();
        // 滑动窗口 每5秒统计近10秒的次数
        // streamOperator.timeWindowAll(Time.seconds(10), Time.seconds(5)).sum(1).project(1).print();

        // 2.keyed  - 分组每个单词词出现次数
        KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = streamOperator.keyBy(0);
        // 滚动窗口 每5秒统计一次
        // streamOperator.timeWindow(Time.seconds(5)).sum(1).print();
        // 滑动窗口 每5秒统计近10秒的次数
        keyedStream.timeWindow(Time.seconds(10), Time.seconds(5)).sum(1).print();

        env.execute();
    }
}

 

  • 基于事件时间的窗口
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;

public class EventTimeWindowDemo {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
        // 指定使用窗口的时间
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        // socket数据输入:1000,spark,1   2000,hadoop,2
        DataStreamSource<String> streamSource = env.socketTextStream("localhost", 8899);

        // 按事件事件时,需绑定输入流的 时间
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> streamOperator = streamSource.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(0)) {
            @Override
            public long extractTimestamp(String element) {
                return Long.parseLong(element.split(",")[0]);
            }
        }).map(new MapFunction<String, Tuple3<String, String, Integer>>() {
            @Override
            public Tuple3<String, String, Integer> map(String value) throws Exception {
                String[] split = value.split(",");
                return Tuple3.of(split[0], split[1], Integer.parseInt(split[2]));
            }
        });


        // 1.No-keyed  - 不分组-只统计单词出现次数
        // 滚动窗口 每5秒统计一次
        // streamOperator.timeWindowAll(Time.seconds(5)).sum(2).project(0,1).print();
        // 滑动窗口 每5秒统计近10秒的次数
        // streamOperator.timeWindowAll(Time.seconds(10), Time.seconds(5)).sum(2).project(1).print();

        // 2.keyed  - 分组每个单词词出现次数
        KeyedStream<Tuple3<String, String, Integer>, Tuple> keyedStream = streamOperator.keyBy(1);
        // 滚动窗口 每5秒统计一次
        // streamOperator.timeWindow(Time.seconds(5)).sum(2).project(1,2).print();
        // 滑动窗口 每5秒统计近10秒的次数
        keyedStream.timeWindow(Time.seconds(10), Time.seconds(5)).sum(2).project(1,2).print();

        env.execute();
    }
}

 

  • 基于会话时间的窗口
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.assigners.ProcessingTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.time.Time;

public class SessionTimeWindowDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());

        // socket数据输入:1000,spark,1   2000,hadoop,2
        DataStreamSource<String> streamSource = env.socketTextStream("localhost", 8899);

        // 按事件事件时,需绑定输入流的 时间
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> streamOperator = streamSource.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
            @Override
            public Tuple3<String, String, Integer> map(String value) throws Exception {
                String[] split = value.split(",");
                return Tuple3.of(split[0], split[1], Integer.parseInt(split[2]));
            }
        });

        // keyed  - 分组每个单词词出现次数
        KeyedStream<Tuple3<String, String, Integer>, Tuple> keyedStream = streamOperator.keyBy(1);
        // 会话窗口, 5s内没有消息,统计单词出现次数  -- processingTime
        keyedStream.window(ProcessingTimeSessionWindows.withGap(Time.seconds(5))).sum(2).print();
        // 会话窗口, 5s内没有消息,统计单词出现次数  -- eventTime
        // 使用 eventTime 需要绑定时间 assignTimestampsAndWatermarks
        // keyedStream.window(EventTimeSessionWindows.withGap(Time.seconds(5))).sum(2).print();
        env.execute();
    }
}

 

posted @ 2021-07-12 11:36  下雨天嗑瓜子  阅读(70)  评论(0)    收藏  举报