package com.xiaohu.env;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.LocalStreamEnvironment;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/*
DataStreamApi
*/
public class EnvDemo {
public static void main(String[] args) throws Exception {
//创建执行环境
// StreamExecutionEnvironment
// .createLocalEnvironment() //创建本地环境
// .createRemoteEnvironment("master",8081,"/xx/xx") //创建远程环境
// .getExecutionEnvironment() //使用默认配置获取环境,底层会进行区分远程或者本地
//创建flink配置文件对象
Configuration conf = new Configuration();
conf.set(RestOptions.BIND_PORT,"8082"); //修改ui界面的端口号,默认是8081
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
//设置流处理环境还是批处理环境 DataSet API已经过时了,现在都是一套代码,进行设置
// env.setRuntimeMode(RuntimeExecutionMode.BATCH); //批处理
// env.setRuntimeMode(RuntimeExecutionMode.STREAMING); //流处理,默认就是流处理
//一般情况下,不会在代码中指定,不够灵活,一般都是在提交的时候,使用命令进行指定 flink run -Dexecution.runtime-mode=BATCH【STREAMING】 ...
DataStreamSource<String> socketDS = env.socketTextStream("master", 7777);
socketDS.flatMap(new FlatMapFunction<String, Tuple2<String,Long>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
String[] words = s.split(" ");
for (String word : words) {
Tuple2<String, Long> tuple2 = Tuple2.of(word, 1L);
collector.collect(tuple2);
}
}
}).keyBy(new KeySelector<Tuple2<String, Long>, String>() {
@Override
public String getKey(Tuple2<String, Long> stringLongTuple2) throws Exception {
return stringLongTuple2.f0;
}
}).sum(1).print();
//一个execute或executeAsync方法触发一个Job作业
//flink是事件驱动执行,是延迟执行或者懒执行
env.execute("DataStreamApi测试无界流读取socket数据");
//可以提交多次
//env..execute() 但是这种,按照代码顺序执行,等前面的job执行完才可以,会进行阻塞
//新版本有个env.executeAsync() 异步执行,就不会发生阻塞了,都用这个提交,上面的也用这个方法提交,用的比较少
//有几个executeAsync(),就会有几个job,对应jobmanager中就会有几个jobmaster
}
}