
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.AllWindowedStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
public class CountWindowDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
DataStreamSource<String> lines = env.socketTextStream("localhost", 8899);
SingleOutputStreamOperator<Integer> numStream = lines.map(new MapFunction<String, Integer>() {
@Override
public Integer map(String s) throws Exception {
return Integer.parseInt(s);
}
});
// 每5次统计一下 - 滚动
// AllWindowedStream<Integer, GlobalWindow> countWindowAll = numStream.countWindowAll(5);
// 每5此统计近10次的值 - 滑动
AllWindowedStream<Integer, GlobalWindow> countWindowAll = numStream.countWindowAll(10, 5);
// 求和
countWindowAll.sum(0).print();
env.execute();
}
}
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class ProcessingTimeWindowDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
// socket数据输入:spark,hadoop,flink
DataStreamSource<String> streamSource = env.socketTextStream("localhost", 8899);
// 将输入数据按逗号分割,转换成Tuple2
SingleOutputStreamOperator<Tuple2<String, Integer>> streamOperator = streamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] split = value.split(",");
for (String s : split) {
out.collect(Tuple2.of(s, 1));
}
}
});
// 指定使用窗口的时间
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
// 1.No-keyed - 不分组-只统计单词出现次数
// 滚动窗口 每5秒统计一次
// streamOperator.timeWindowAll(Time.seconds(5)).sum(1).print();
// 滑动窗口 每5秒统计近10秒的次数
// streamOperator.timeWindowAll(Time.seconds(10), Time.seconds(5)).sum(1).project(1).print();
// 2.keyed - 分组每个单词词出现次数
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = streamOperator.keyBy(0);
// 滚动窗口 每5秒统计一次
// streamOperator.timeWindow(Time.seconds(5)).sum(1).print();
// 滑动窗口 每5秒统计近10秒的次数
keyedStream.timeWindow(Time.seconds(10), Time.seconds(5)).sum(1).print();
env.execute();
}
}
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
public class EventTimeWindowDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
// 指定使用窗口的时间
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// socket数据输入:1000,spark,1 2000,hadoop,2
DataStreamSource<String> streamSource = env.socketTextStream("localhost", 8899);
// 按事件事件时,需绑定输入流的 时间
SingleOutputStreamOperator<Tuple3<String, String, Integer>> streamOperator = streamSource.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(0)) {
@Override
public long extractTimestamp(String element) {
return Long.parseLong(element.split(",")[0]);
}
}).map(new MapFunction<String, Tuple3<String, String, Integer>>() {
@Override
public Tuple3<String, String, Integer> map(String value) throws Exception {
String[] split = value.split(",");
return Tuple3.of(split[0], split[1], Integer.parseInt(split[2]));
}
});
// 1.No-keyed - 不分组-只统计单词出现次数
// 滚动窗口 每5秒统计一次
// streamOperator.timeWindowAll(Time.seconds(5)).sum(2).project(0,1).print();
// 滑动窗口 每5秒统计近10秒的次数
// streamOperator.timeWindowAll(Time.seconds(10), Time.seconds(5)).sum(2).project(1).print();
// 2.keyed - 分组每个单词词出现次数
KeyedStream<Tuple3<String, String, Integer>, Tuple> keyedStream = streamOperator.keyBy(1);
// 滚动窗口 每5秒统计一次
// streamOperator.timeWindow(Time.seconds(5)).sum(2).project(1,2).print();
// 滑动窗口 每5秒统计近10秒的次数
keyedStream.timeWindow(Time.seconds(10), Time.seconds(5)).sum(2).project(1,2).print();
env.execute();
}
}
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.assigners.ProcessingTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
public class SessionTimeWindowDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
// socket数据输入:1000,spark,1 2000,hadoop,2
DataStreamSource<String> streamSource = env.socketTextStream("localhost", 8899);
// 按事件事件时,需绑定输入流的 时间
SingleOutputStreamOperator<Tuple3<String, String, Integer>> streamOperator = streamSource.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
@Override
public Tuple3<String, String, Integer> map(String value) throws Exception {
String[] split = value.split(",");
return Tuple3.of(split[0], split[1], Integer.parseInt(split[2]));
}
});
// keyed - 分组每个单词词出现次数
KeyedStream<Tuple3<String, String, Integer>, Tuple> keyedStream = streamOperator.keyBy(1);
// 会话窗口, 5s内没有消息,统计单词出现次数 -- processingTime
keyedStream.window(ProcessingTimeSessionWindows.withGap(Time.seconds(5))).sum(2).print();
// 会话窗口, 5s内没有消息,统计单词出现次数 -- eventTime
// 使用 eventTime 需要绑定时间 assignTimestampsAndWatermarks
// keyedStream.window(EventTimeSessionWindows.withGap(Time.seconds(5))).sum(2).print();
env.execute();
}
}