Loading

java flink连接kafka实时数据处理【部署+代码】

云服务器上部署kafka服务器

环境:centos7.6 docker26.1.4

1.准备compose.yaml文件

services:
  zookeeper:
    image: confluentinc/cp-zookeeper:7.3.2
    container_name: zookeeper
    ports:
      - "2181:2181"
    environment:
      ZOOKEEPER_CLIENT_PORT: 2181
      ZOOKEEPER_TICK_TIME: 2000
    networks:
      - kafka-net

  kafka:
    image: confluentinc/cp-kafka:7.3.2
    container_name: kafka
    depends_on:
      - zookeeper
    ports:
      - "9092:9092"
      - "29092:29092"
    environment:
      KAFKA_BROKER_ID: 1  # 服务器id
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181                                         # kafka需要连接的zookeeper ip:port
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT     # 定义监听器,INTERNAL和EXTERNAL都是自己定义的(据ai说:单监听器这行可以省略,多监听器这行不能省略)
      KAFKA_ADVERTISED_LISTENERS: INTERNAL://kafka:9092,EXTERNAL://152.136.59.229:29092    # 配置 Kafka 向客户端发布的连接地址(内部用容器名,外部用公网 IP)
      KAFKA_LISTENERS: INTERNAL://0.0.0.0:9092,EXTERNAL://0.0.0.0:29092               # 新增此行(必须与 ADVERTISED_LISTENERS 的监听器名称匹配),意味着 Kafka 会在容器内部开启两个监听端口:
      KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL    				      # 显示指定 Kafka 集群内部组件(像 broker 之间、broker 与 ZooKeeper 之间)进行通信时所使用的监听器。
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
    networks:
      - kafka-net

networks:
  kafka-net:
    driver: bridge

2.运行该文件

docker compose -d up

3.查看容器是否启动成功

docker ps

java flink程序代码

环境:java1.8、flink1.19.0

1.新建项目

2.配置maven(改3个)
image

3.pom.xml配置依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.aiwei</groupId>
    <artifactId>test7</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <flink.version>1.19.0</flink.version>
    </properties>

    <dependencies>
        <!-- flink核心依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- flink流处理依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- flink客户端(命令行交互) -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- 连接器核心依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-base</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- flink-kafka连接器 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka</artifactId>
            <version>3.2.0-1.19</version>
        </dependency>
    </dependencies>
</project>

4.在Main同级目录下创建KafkaSourceSinkSamplejava类,该文件中代码如下

package com.aiwei;

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.util.Collector;

import java.util.Properties;

public class KafkaSourceSinkSample {
    public static void main(String[] args) throws Exception{
        // 创建环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);      // env.setParallelism(1) 这行代码的作用是把 Flink 执行环境里所有算子、数据源以及数据接收器的并行度统一设定为 1。这里的并行度,指的是任务在执行过程中会被拆分成的并行子任务的数量。当并行度设为 1 时,就意味着每一个操作只会有一个并行实例来处理数据。

        // 配置Kafka连接属性
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers("152.136.59.229:29092")
                .setTopics("my-topic")
                .setGroupId("flink-consumer-group")
                .setStartingOffsets(OffsetsInitializer.latest()) // 可以根据需求选择latest、earliest等
                // .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema()) // 设置反序列化器
                .build();

        // 从Kafka读取数据
        DataStreamSource<String> dataStream = env.fromSource(source, WatermarkStrategy.noWatermarks(), "Kafka Source");


        // 选择一:转换
        // SingleOutputStreamOperator<String> flatMap = dataStream.flatMap(new FlatMapFunction<String, String>() {
        //     @Override
        //     public void flatMap(String s, Collector<String> collector) throws Exception {
        //         String[] words = s.split(" ");
        //         for(int i = 0; i < words.length; i++){
        //             collector.collect(words[i]);
        //         }
        //     }
        // });
        //
        // Properties properties = new Properties();
        // properties.put("bootstrap.servers", "152.136.59.229:29092");
        // flatMap.addSink(new FlinkKafkaProducer<String>(
        //         "target-words",
        //         new SimpleStringSchema(),
        //         properties
        // ));

        // 选择二:打印
        dataStream.print();

        env.execute();
    }
}

java flink kafka实时数据流传输

1.启动java程序

2.进入kafka容器

docker exec -it kafka bash

3.列出topic

kafka-topics --bootstrap-server localhost:9092 --list

给指定topic增加任务

kafka-console-producer --bootstrap-server localhost:9092 --topic my-topic

此时,java控制台会实时输出相应内容

posted @ 2025-06-23 16:06  一只大学生  阅读(211)  评论(0)    收藏  举报