zookeeper & kafka部署、配置手册
Linux版
1. 搭建zk
参考:https://www.cnblogs.com/fushiyi/articles/18141514
2. 搭建kafka
参考:https://www.cnblogs.com/fushiyi/articles/18141514
## 单机版
cd kafka_2.13-3.4.0/bin
./kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic real_time
windows版
-- 启动zk
bin\windows\zookeeper-server-start.bat config\zookeeper.properties
-- 启动kafka
bin\windows\kafka-server-start.bat config\server.properties
-- 创建topic
bin\windows\kafka-topics.bat --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic testfubo
-- 创建生产者
bin\windows\kafka-console-producer.bat --bootstrap-server localhost:9092 --topic testfubo1
-------------flink 连接kafka配置---------------
// kafka配置
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
// 配置序列化
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
// 配置消费者组
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "testfubo");
// 从kafka获取数据流
DataStream<String> stream = env.addSource(new FlinkKafkaConsumer<String>(
"testfubo1",
new SimpleStringSchema(),
properties
));
stream.print("flink");
env.execute();
-------------flink数据处理参考---------------
// 切分数据
SingleOutputStreamOperator<Tuple2<String, Long>> wordAndOne = stream
.flatMap((String line, Collector<String> words) -> {
Arrays.stream(line.split(" ")).forEach(words::collect);
})
//指定拆分后的返回类型
.returns(Types.STRING)
//将切分的单词(存在重复)都映射成成二元组,格式为(单词,初始个数为1)如(hello,1)
.map(word -> Tuple2.of(word, 1L))
//指定返回二元组的泛型
.returns(Types.TUPLE(Types.STRING, Types.LONG));
//根据第一个位置的单词分组
KeyedStream<Tuple2<String, Long>, String> wordAndOneKS = wordAndOne
.keyBy(t -> t.f0);
//根据第二个位置的初始个数求和
SingleOutputStreamOperator<Tuple2<String, Long>> result = wordAndOneKS
.sum(1);
result.print("test");
env.execute();