狂自私

导航

向kafka写入数据

import json,os,ast
from typing import Any
import logging
from kafka import KafkaProducer
from kafka.errors import KafkaError, NoBrokersAvailable
from typing import Dict, List, Optional

# 配置日志(方便排查问题)
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("kafka_producer")

class KafkaDataSender:
    """Kafka 数据发送器(向指定 Topic 发送数据)"""
    def __init__(
        self,
        bootstrap_servers: str,
        acks: int = 1,  # 确认机制:0=无确认,1=leader确认,all=所有副本确认
        retries: int = 3,  # 发送失败重试次数
        batch_size: int = 16384,  # 批量发送的字节大小阈值(默认16KB)
        linger_ms: int = 5,  # 批量发送延迟(毫秒,等待更多消息凑批)
        max_request_size: int = 10485760  # 单条消息最大大小(默认10MB)
    ):
        """
        初始化 Kafka 生产者
        :param bootstrap_servers: Kafka 地址(多个用逗号分隔,如 "192.168.1.100:9092,192.168.1.101:9092")
        :param acks: 消息确认机制
        :param retries: 重试次数
        :param batch_size: 批量发送阈值
        :param linger_ms: 批量延迟
        :param max_request_size: 单条消息最大大小
        """
        try:
            # 初始化生产者
            self.producer = KafkaProducer(
                bootstrap_servers=bootstrap_servers.split(","),
                acks=acks,
                retries=retries,
                batch_size=batch_size,
                linger_ms=linger_ms,
                max_request_size=max_request_size,
                # 序列化:将 Python 对象转为 JSON 字符串,再编码为 bytes
                value_serializer=lambda v: json.dumps(v, ensure_ascii=False).encode("utf-8"),
                key_serializer=lambda k: k.encode("utf-8") if k else None,
                api_version=(3, 4, 0)  # 根据你的 Kafka 版本调整
            )
            logger.info(f"Kafka 生产者初始化成功!地址:{bootstrap_servers}")
        except NoBrokersAvailable as e:
            logger.error(f"无法连接到 Kafka 集群:{e}")
            raise
        except Exception as e:
            logger.error(f"Kafka 生产者初始化失败:{e}")
            raise

    def send_single_msg(
        self,
        topic: str,
        value: Dict,
        key: Optional[str] = None,
        partition: Optional[int] = None
    ):
        """
        发送单条消息到指定 Topic
        :param topic: 目标 Topic 名称
        :param value: 要发送的数据(字典格式)
        :param key: 消息 Key(可选,用于分区路由)
        :param partition: 指定发送到的分区(可选,不指定则按 Key 哈希分配)
        """
        try:
            # 发送消息(异步发送,可通过 get() 阻塞等待结果)
            future = self.producer.send(
                topic=topic,
                value=value,
                key=key,
                partition=partition
            )
            # 阻塞等待发送结果,超时时间5秒
            result = future.get(timeout=5)
            logger.info(
                f"单条消息发送成功!Topic:{topic},Partition:{result.partition},Offset:{result.offset},Key:{key}"
            )
        except KafkaError as e:
            logger.error(f"单条消息发送失败!Topic:{topic},Key:{key},异常:{e}")
            raise
        except Exception as e:
            logger.error(f"单条消息发送异常!Topic:{topic},异常:{e}")
            raise

    def send_batch_msgs(
        self,
        topic: str,
        msg_list: List[Dict],
        keys: Optional[List[str]] = None
    ):
        """
        批量发送消息到指定 Topic(性能更高)
        :param topic: 目标 Topic 名称
        :param msg_list: 消息列表(每个元素是字典)
        :param keys: 消息 Key 列表(可选,需和 msg_list 长度一致)
        """
        if not msg_list:
            logger.warning("批量发送的消息列表为空,跳过")
            return

        # 若指定 Key 列表,需确保长度匹配
        if keys and len(keys) != len(msg_list):
            raise ValueError("Key 列表长度必须和消息列表长度一致")

        try:
            # 批量发送(生产者会自动凑批,linger_ms 控制延迟)
            futures = []
            for idx, msg in enumerate(msg_list):
                key = keys[idx] if keys else None
                future = self.producer.send(
                    topic=topic,
                    value=msg,
                    key=key
                )
                futures.append(future)

            # 等待所有消息发送完成
            for idx, future in enumerate(futures):
                result = future.get(timeout=5)
                logger.debug(
                    f"批量消息 {idx+1} 发送成功!Topic:{topic},Partition:{result.partition},Offset:{result.offset}"
                )

            logger.info(f"批量发送完成!Topic:{topic},共发送 {len(msg_list)} 条消息")
        except KafkaError as e:
            logger.error(f"批量消息发送失败!Topic:{topic},异常:{e}")
            raise
        except Exception as e:
            logger.error(f"批量消息发送异常!Topic:{topic},异常:{e}")
            raise

    def close(self):
        """关闭生产者(确保消息全部发送完成)"""
        try:
            self.producer.flush()  # 刷新缓冲区,确保所有待发送消息都发出去
            self.producer.close()
            logger.info("Kafka 生产者已关闭")
        except Exception as e:
            logger.error(f"关闭生产者失败:{e}")

def parse_mixed_dict(s: str) -> Dict[str, Any]:
    """
    安全解析:
    - Python dict 字符串
    - 内部字段 data 为 JSON 字符串
    """
    # 第一步:Python dict string → dict
    obj = ast.literal_eval(s)

    # 第二步:data 字段 JSON → dict
    data = obj.get("data")
    if isinstance(data, str):
        obj["data"] = json.loads(data)

    return obj
if __name__ == "__main__":
    # 1. 初始化发送器
    sender = KafkaDataSender(
        bootstrap_servers="192.168.50.23:9092",  # 替换为你的 Kafka 地址
        acks=1,
        retries=3
    )
    base_dir = r'D:\BaiduSyncdisk\temp\2026年01月22日\files'
    # for file in os.listdir(base_dir):
    #     file_path = os.path.join(base_dir,file)
    #     with open(file_path,'r',encoding='utf-8')as fd:
    #         lines = fd.readlines()
    #         if lines[-1] == '':
    #             lines = lines[:-1]
    #         sender.send_batch_msgs(topic=file,msg_list=lines)
    #         logger.info(f'{file} 发送完成!')
    for file in os.listdir(base_dir):
        file_path = os.path.join(base_dir,file)
        with open(file_path,'r',encoding='utf-8')as fd:
            line = fd.readline().strip()
            while line:
                line_obj = parse_mixed_dict(line)
                sender.send_single_msg(topic=file,value=line_obj)
                line = fd.readline().strip()
            logger.info(f'{file} 发送完成!')
                
    # # 2. 示例1:发送单条 JSON 数据到指定 Topic
    # single_msg = {
    #     "acquisitionTime": "2026-01-22 11:56:19",
    #     "cityId": "16F37FCECE6E0ED1E0530100007F2F45",
    #     "deviceCode": "47fb1d5c0abd2a8893f636530faf0a036f284d402",
    #     "data": {"h2": -99999, "ch4": -99999}
    # }
    # sender.send_single_msg(
    #     topic="gyj_test",  # 指定要发送的 Topic
    #     value=single_msg,
    #     key="device_001"  # 可选:指定消息 Key
    # )

    # # 3. 示例2:批量发送多条数据到同一 Topic
    # batch_msgs = [
    #     {"id": 1, "name": "张三", "age": 25},
    #     {"id": 2, "name": "李四", "age": 30},
    #     {"id": 3, "name": "王五", "age": 35}
    # ]
    # sender.send_batch_msgs(
    #     topic="user_data",  # 另一个指定的 Topic
    #     msg_list=batch_msgs,
    #     keys=["user_1", "user_2", "user_3"]  # 可选:每条消息的 Key
    # )

    # # 4. 关闭发送器(程序结束前执行)
    sender.close()

 

posted on 2026-01-22 22:02  狂自私  阅读(0)  评论(0)    收藏  举报