import json,os,ast
from typing import Any
import logging
from kafka import KafkaProducer
from kafka.errors import KafkaError, NoBrokersAvailable
from typing import Dict, List, Optional
# 配置日志(方便排查问题)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("kafka_producer")
class KafkaDataSender:
"""Kafka 数据发送器(向指定 Topic 发送数据)"""
def __init__(
self,
bootstrap_servers: str,
acks: int = 1, # 确认机制:0=无确认,1=leader确认,all=所有副本确认
retries: int = 3, # 发送失败重试次数
batch_size: int = 16384, # 批量发送的字节大小阈值(默认16KB)
linger_ms: int = 5, # 批量发送延迟(毫秒,等待更多消息凑批)
max_request_size: int = 10485760 # 单条消息最大大小(默认10MB)
):
"""
初始化 Kafka 生产者
:param bootstrap_servers: Kafka 地址(多个用逗号分隔,如 "192.168.1.100:9092,192.168.1.101:9092")
:param acks: 消息确认机制
:param retries: 重试次数
:param batch_size: 批量发送阈值
:param linger_ms: 批量延迟
:param max_request_size: 单条消息最大大小
"""
try:
# 初始化生产者
self.producer = KafkaProducer(
bootstrap_servers=bootstrap_servers.split(","),
acks=acks,
retries=retries,
batch_size=batch_size,
linger_ms=linger_ms,
max_request_size=max_request_size,
# 序列化:将 Python 对象转为 JSON 字符串,再编码为 bytes
value_serializer=lambda v: json.dumps(v, ensure_ascii=False).encode("utf-8"),
key_serializer=lambda k: k.encode("utf-8") if k else None,
api_version=(3, 4, 0) # 根据你的 Kafka 版本调整
)
logger.info(f"Kafka 生产者初始化成功!地址:{bootstrap_servers}")
except NoBrokersAvailable as e:
logger.error(f"无法连接到 Kafka 集群:{e}")
raise
except Exception as e:
logger.error(f"Kafka 生产者初始化失败:{e}")
raise
def send_single_msg(
self,
topic: str,
value: Dict,
key: Optional[str] = None,
partition: Optional[int] = None
):
"""
发送单条消息到指定 Topic
:param topic: 目标 Topic 名称
:param value: 要发送的数据(字典格式)
:param key: 消息 Key(可选,用于分区路由)
:param partition: 指定发送到的分区(可选,不指定则按 Key 哈希分配)
"""
try:
# 发送消息(异步发送,可通过 get() 阻塞等待结果)
future = self.producer.send(
topic=topic,
value=value,
key=key,
partition=partition
)
# 阻塞等待发送结果,超时时间5秒
result = future.get(timeout=5)
logger.info(
f"单条消息发送成功!Topic:{topic},Partition:{result.partition},Offset:{result.offset},Key:{key}"
)
except KafkaError as e:
logger.error(f"单条消息发送失败!Topic:{topic},Key:{key},异常:{e}")
raise
except Exception as e:
logger.error(f"单条消息发送异常!Topic:{topic},异常:{e}")
raise
def send_batch_msgs(
self,
topic: str,
msg_list: List[Dict],
keys: Optional[List[str]] = None
):
"""
批量发送消息到指定 Topic(性能更高)
:param topic: 目标 Topic 名称
:param msg_list: 消息列表(每个元素是字典)
:param keys: 消息 Key 列表(可选,需和 msg_list 长度一致)
"""
if not msg_list:
logger.warning("批量发送的消息列表为空,跳过")
return
# 若指定 Key 列表,需确保长度匹配
if keys and len(keys) != len(msg_list):
raise ValueError("Key 列表长度必须和消息列表长度一致")
try:
# 批量发送(生产者会自动凑批,linger_ms 控制延迟)
futures = []
for idx, msg in enumerate(msg_list):
key = keys[idx] if keys else None
future = self.producer.send(
topic=topic,
value=msg,
key=key
)
futures.append(future)
# 等待所有消息发送完成
for idx, future in enumerate(futures):
result = future.get(timeout=5)
logger.debug(
f"批量消息 {idx+1} 发送成功!Topic:{topic},Partition:{result.partition},Offset:{result.offset}"
)
logger.info(f"批量发送完成!Topic:{topic},共发送 {len(msg_list)} 条消息")
except KafkaError as e:
logger.error(f"批量消息发送失败!Topic:{topic},异常:{e}")
raise
except Exception as e:
logger.error(f"批量消息发送异常!Topic:{topic},异常:{e}")
raise
def close(self):
"""关闭生产者(确保消息全部发送完成)"""
try:
self.producer.flush() # 刷新缓冲区,确保所有待发送消息都发出去
self.producer.close()
logger.info("Kafka 生产者已关闭")
except Exception as e:
logger.error(f"关闭生产者失败:{e}")
def parse_mixed_dict(s: str) -> Dict[str, Any]:
"""
安全解析:
- Python dict 字符串
- 内部字段 data 为 JSON 字符串
"""
# 第一步:Python dict string → dict
obj = ast.literal_eval(s)
# 第二步:data 字段 JSON → dict
data = obj.get("data")
if isinstance(data, str):
obj["data"] = json.loads(data)
return obj
if __name__ == "__main__":
# 1. 初始化发送器
sender = KafkaDataSender(
bootstrap_servers="192.168.50.23:9092", # 替换为你的 Kafka 地址
acks=1,
retries=3
)
base_dir = r'D:\BaiduSyncdisk\temp\2026年01月22日\files'
# for file in os.listdir(base_dir):
# file_path = os.path.join(base_dir,file)
# with open(file_path,'r',encoding='utf-8')as fd:
# lines = fd.readlines()
# if lines[-1] == '':
# lines = lines[:-1]
# sender.send_batch_msgs(topic=file,msg_list=lines)
# logger.info(f'{file} 发送完成!')
for file in os.listdir(base_dir):
file_path = os.path.join(base_dir,file)
with open(file_path,'r',encoding='utf-8')as fd:
line = fd.readline().strip()
while line:
line_obj = parse_mixed_dict(line)
sender.send_single_msg(topic=file,value=line_obj)
line = fd.readline().strip()
logger.info(f'{file} 发送完成!')
# # 2. 示例1:发送单条 JSON 数据到指定 Topic
# single_msg = {
# "acquisitionTime": "2026-01-22 11:56:19",
# "cityId": "16F37FCECE6E0ED1E0530100007F2F45",
# "deviceCode": "47fb1d5c0abd2a8893f636530faf0a036f284d402",
# "data": {"h2": -99999, "ch4": -99999}
# }
# sender.send_single_msg(
# topic="gyj_test", # 指定要发送的 Topic
# value=single_msg,
# key="device_001" # 可选:指定消息 Key
# )
# # 3. 示例2:批量发送多条数据到同一 Topic
# batch_msgs = [
# {"id": 1, "name": "张三", "age": 25},
# {"id": 2, "name": "李四", "age": 30},
# {"id": 3, "name": "王五", "age": 35}
# ]
# sender.send_batch_msgs(
# topic="user_data", # 另一个指定的 Topic
# msg_list=batch_msgs,
# keys=["user_1", "user_2", "user_3"] # 可选:每条消息的 Key
# )
# # 4. 关闭发送器(程序结束前执行)
sender.close()