剑道第一仙

导航

chatgpt使用记录

网址:http://chat.yangxiwl.cn/chatbotai/1002?channel=bdtoufangChatztHD3

写一个java程序,要求将kafka的数据消费批量插入的clickhouse数据库

你可以使用Kafka的Java客户端库和ClickHouse的Java驱动来实现将Kafka数据消费并批量插入到ClickHouse数据库中的功能。以下是一个简单的示例程序:

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import ru.yandex.clickhouse.BalancedClickhouseDataSource;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;

import java.sql.SQLException;
import java.util.*;

public class KafkaToClickHouse {

    public static void main(String[] args) {
        // Kafka配置
        Properties kafkaProps = new Properties();
        kafkaProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        kafkaProps.put(ConsumerConfig.GROUP_ID_CONFIG, "clickhouse-consumer");
        kafkaProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

        // ClickHouse配置
        ClickHouseProperties clickHouseProps = new ClickHouseProperties();
        clickHouseProps.setHost("localhost");
        clickHouseProps.setPort(8123);
        clickHouseProps.setDatabase("your_database_name");
        clickHouseProps.setUser("your_username");
        clickHouseProps.setPassword("your_password");

        // Kafka消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(kafkaProps);
        consumer.subscribe(Collections.singletonList("your_topic_name"));

        // ClickHouse连接池
        BalancedClickhouseDataSource dataSource = new BalancedClickhouseDataSource("jdbc:clickhouse://", clickHouseProps);

        // 消费并插入数据
        try (ClickHouseConnection connection = dataSource.getConnection();
             ClickHousePreparedStatement stmt = connection.prepareStatement("INSERT INTO your_table_name (col1, col2) VALUES (?, ?)")) {
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
                    String[] data = record.value().split(","); // 假设数据格式为逗号分隔的两列
                    stmt.setString(1, data[0]);
                    stmt.setString(2, data[1]);
                    stmt.addBatch();
                }
                stmt.executeBatch();
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            consumer.close();
            dataSource.close();
        }
    }
}

 

下面是一个示例程序,它将Kafka的数据按照每100条记录进行批量插入到ClickHouse数据库,并在每个批次结束后提交事务

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import ru.yandex.clickhouse.BalancedClickhouseDataSource;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;

import java.sql.SQLException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

public class KafkaToClickHouse {

    public static void main(String[] args) {
        // Kafka配置
        Properties kafkaProps = new Properties();
        kafkaProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        kafkaProps.put(ConsumerConfig.GROUP_ID_CONFIG, "clickhouse-consumer");
        kafkaProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 100); // 每次最多拉取100条记录

        // ClickHouse配置
        ClickHouseProperties clickHouseProps = new ClickHouseProperties();
        clickHouseProps.setHost("localhost");
        clickHouseProps.setPort(8123);
        clickHouseProps.setDatabase("your_database_name");
        clickHouseProps.setUser("your_username");
        clickHouseProps.setPassword("your_password");

        // Kafka消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(kafkaProps);
        consumer.subscribe(Collections.singletonList("your_topic_name"));

        // ClickHouse连接池
        BalancedClickhouseDataSource dataSource = new BalancedClickhouseDataSource("jdbc:clickhouse://", clickHouseProps);

        // 消费并插入数据
        try (ClickHouseConnection connection = dataSource.getConnection()) {
            List<String> batch = new ArrayList<>();
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
                    String[] data = record.value().split(","); // 假设数据格式为逗号分隔的两列
                    String row = String.format("('%s', '%s')", data[0], data[1]);
                    batch.add(row);
                }
                if (batch.size() >= 100) {
                    insertBatch(connection, batch);
                    batch.clear();
                }
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            consumer.close();
            dataSource.close();
        }
    }

    private static void insertBatch(ClickHouseConnection connection, List<String> batch) throws SQLException {
        try (ClickHousePreparedStatement stmt = connection.prepareStatement("INSERT INTO your_table_name (col1, col2) VALUES " + String.join(",", batch))) {
            stmt.execute();
        }
    }
}

 如何实现批量提交的同时避免重复插入

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import ru.yandex.clickhouse.BalancedClickhouseDataSource;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;

import java.sql.SQLException;
import java.time.Duration;
import java.util.*;

public class KafkaToClickHouse {

    public static void main(String[] args) {
        // Kafka配置
        Properties kafkaProps = new Properties();
        kafkaProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        kafkaProps.put(ConsumerConfig.GROUP_ID_CONFIG, "clickhouse-consumer");
        kafkaProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 100); // 每次最多拉取100条记录

        // ClickHouse配置
        ClickHouseProperties clickHouseProps = new ClickHouseProperties();
        clickHouseProps.setHost("localhost");
        clickHouseProps.setPort(8123);
        clickHouseProps.setDatabase("your_database_name");
        clickHouseProps.setUser("your_username");
        clickHouseProps.setPassword("your_password");

        // Kafka消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(kafkaProps);
        consumer.subscribe(Collections.singletonList("your_topic_name"));

        // ClickHouse连接池
        BalancedClickhouseDataSource dataSource = new BalancedClickhouseDataSource("jdbc:clickhouse://", clickHouseProps);

        // 消费并插入数据
        try (ClickHouseConnection connection = dataSource.getConnection()) {
            List<String> batch = new ArrayList<>();
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
                    String[] data = record.value().split(","); // 假设数据格式为逗号分隔的两列
                    String row = String.format("('%s', '%s')", data[0], data[1]);
                    batch.add(row);
                }
                if (batch.size() >= 100) {
                    insertBatch(connection, batch);
                    batch.clear();
                }
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            consumer.close();
            dataSource.close();
        }
    }

    private static void insertBatch(ClickHouseConnection connection, List<String> batch) throws SQLException {
        try (ClickHousePreparedStatement stmt = connection.prepareStatement("INSERT INTO your_table_name (col1, col2) VALUES " + String.join(",", batch))) {
            stmt.execute();
        } catch (SQLException e) {
            // 处理重复插入的异常
            if (e.getMessage().contains("Code: 409")) {
                // 重复插入数据,可以忽略或者进行其他处理
            } else {
                throw e;
            }
        }
    }
}

 

posted on 2023-10-10 22:45  剑道第一仙  阅读(12)  评论(0编辑  收藏  举报