package com.xiaohu.source;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/*
从kafka中读取数据
kafka消费者的参数,
auto.reset.offsets
earliest: 如果设置了偏移量,就从偏移量开始消费,如果没有,就从最早的一个开始消费
latest: 如果设置了偏移量,就从偏移量开始消费,如果没有,从最新的开始消费
flink中kafka的参数设置意思:
setStartingOffsets(OffsetsInitializer)
OffsetsInitializer接口中有静态的方法:
【默认的】earliest():不管有没有设置偏移量,它都会将偏移量初始化为每个分区最早可用的偏移量。强制的
latest():不管有没有设置偏移量,它都会将偏移量初始化为每个分区的最新偏移量。强制的
*/
public class FromKafkaSourceDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//构建kafka源
KafkaSource<String> kafkaSource = KafkaSource
.<String>builder()
.setBootstrapServers("master:9092,node1:9092,node2:9092") //设置kafka节点地址和端口,所有的
.setGroupId("xiaohu") //设置消费者组名
.setTopics("wc") //设置要消费的topic
.setValueOnlyDeserializer(new SimpleStringSchema()) //对消费的数据进行反序列化,才能看懂
.setStartingOffsets(OffsetsInitializer.latest()) //设置消费的模式
.build(); //构建源
DataStreamSource<String> kafkaSource1 = env.fromSource(
kafkaSource,
WatermarkStrategy.noWatermarks(),
"kafkaSource");
kafkaSource1.print();
env.execute("从kafka读取数据");
}
}