1. 工具链角色与依赖说明
2. 环境准备与依赖安装
2.1 安装组件
# 核心依赖:OpenTelemetry SDK及日志处理器 pip install opentelemetry-sdk opentelemetry-exporter-otlp-proto-http # ClickHouse驱动(用于直接写入或验证数据) pip install clickhouse-driver # 配置文件处理 pip install configparser # 日志文件监控(实时读取新增日志) pip install watchdog
2.2 CK新建监控表
-- 在ClickHouse中手动执行此SQL(需提前创建数据库hyperdx_logs) CREATE TABLE IF NOT EXISTS hyperdx_logs.clickhouse_info_logs ( timestamp DateTime64(3) CODEC(Delta, ZSTD), -- 毫秒级时间戳 severity_text String CODEC(ZSTD), -- 日志级别(INFO/ERROR等) body String CODEC(ZSTD), -- 日志内容 service_name String CODEC(ZSTD), -- 服务名(HyperDX用于筛选) thread String CODEC(ZSTD), -- 线程信息 attributes Map(String, String) CODEC(ZSTD) -- 扩展属性(兼容OTLP格式) ) ENGINE = MergeTree() ORDER BY (timestamp, service_name) -- 按时间和服务名排序,优化HyperDX查询
3. 配置文件(config.ini
)
[log_source] # ClickHouse的日志文件路径(需替换为实际路径) log_file_path = /var/log/clickhouse-server/clickhouse-server.log # 目标日志级别(仅收集INFO) target_level = INFO [opentelemetry] # OpenTelemetry服务地址(若使用HyperDX托管的OTel Collector,填HyperDX的Endpoint) # 本地测试可省略,直接通过Exporter写入ClickHouse otlp_endpoint = http://localhost:4318 # HyperDX默认OTLP端点 [clickhouse] # ClickHouse连接信息(HyperDX会从这里读取数据) host = localhost port = 9000 user = default password = database = hyperdx_logs # 需提前创建的数据库 table = clickhouse_info_logs # 存储日志的表 [hyperdx] # HyperDX的数据源标识(确保与HyperDX配置一致) service_name = clickhouse-monitor service_version = 1.0
4. Python 脚本实现(clickhouse_log_collector.py
)
import os import re import time import configparser from datetime import datetime from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler from opentelemetry import trace, logs from opentelemetry.sdk.logs import LoggerProvider, LogRecordProcessor from opentelemetry.sdk.logs.export import BatchLogRecordProcessor from opentelemetry.sdk.resources import Resource from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter from clickhouse_driver import Client # 仅用于验证连接,不包含建表逻辑 class ClickHouseLogCollector: def __init__(self, config_path): # 加载配置 self.config = self._load_config(config_path) # 初始化OpenTelemetry资源 self.resource = Resource(attributes={ "service.name": self.config['hyperdx']['service_name'], "service.version": self.config['hyperdx']['service_version'], "log.source": "clickhouse-server" }) # 初始化OpenTelemetry日志处理器 self.logger_provider = self._init_otlp_logger() self.logger = self.logger_provider.get_logger(__name__) # 初始化ClickHouse客户端(仅验证连接,不涉及建表) self.ch_client = self._init_clickhouse_client() # 日志解析正则 self.log_pattern = re.compile( r'(?P<timestamp>\d{4}\.\d{2}\.\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \[(?P<thread>.*?)\] (?P<level>\w+) (?P<message>.*)' ) def _load_config(self, config_path): """加载配置文件""" if not os.path.exists(config_path): raise FileNotFoundError(f"配置文件 {config_path} 不存在") config = configparser.ConfigParser() config.read(config_path, encoding='utf-8') return config def _init_otlp_logger(self): """初始化OpenTelemetry日志器""" otlp_exporter = OTLPLogExporter( endpoint=self.config['opentelemetry']['otlp_endpoint'], headers={"x-hyperdx-api-key": "YOUR_HYPERDX_API_KEY"} # 若使用HyperDX云服务,需填写APIKey ) logger_provider = LoggerProvider(resource=self.resource) logger_provider.add_log_record_processor( BatchLogRecordProcessor(otlp_exporter) ) logs.set_logger_provider(logger_provider) return logger_provider def _init_clickhouse_client(self): """初始化ClickHouse客户端(仅验证连接)""" client = Client( host=self.config['clickhouse']['host'], port=int(self.config['clickhouse']['port']), user=self.config['clickhouse']['user'], password=self.config['clickhouse']['password'], database=self.config['clickhouse']['database'] ) # 验证连接 try: client.execute("SELECT 1") print(f"成功连接到ClickHouse: {self.config['clickhouse']['host']}:{self.config['clickhouse']['port']}") print(f"请确保已手动创建表: {self.config['clickhouse']['database']}.{self.config['clickhouse']['table']}") except Exception as e: raise ConnectionError(f"ClickHouse连接失败: {str(e)}") return client def _parse_log_line(self, line): """解析ClickHouse日志行""" match = self.log_pattern.match(line.strip()) if not match: return None level = match.group('level').upper() if level != self.config['log_source']['target_level']: return None timestamp_str = match.group('timestamp').replace('.', '-', 3).replace('.', ':') try: timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S:%f") except Exception: return None return { "timestamp": timestamp, "severity_text": level, "body": match.group('message'), "thread": match.group('thread'), "service_name": self.config['hyperdx']['service_name'] } def start_monitoring(self): """监控日志文件,实时采集并通过OTLP发送""" class LogHandler(FileSystemEventHandler): def __init__(self, collector): self.collector = collector self.log_file = open(collector.config['log_source']['log_file_path'], 'r', encoding='utf-8') self.log_file.seek(0, os.SEEK_END) # 定位到文件末尾,只处理新增日志 def on_modified(self, event): """文件更新时触发""" if event.src_path == self.collector.config['log_source']['log_file_path']: for line in self.log_file.readlines(): parsed = self.collector._parse_log_line(line) if parsed: # 通过OpenTelemetry记录日志 self.collector.logger.log( severity=logs.Severity.INFO, message=parsed['body'], timestamp=parsed['timestamp'], attributes={ "thread": parsed['thread'], "service_name": parsed['service_name'] } ) def close(self): self.log_file.close() # 启动监控 log_path = self.config['log_source']['log_file_path'] print(f"开始监控ClickHouse日志:{log_path}(仅收集{self.config['log_source']['target_level']}级别)") event_handler = LogHandler(self) observer = Observer() observer.schedule(event_handler, path=os.path.dirname(log_path), recursive=False) observer.start() try: while True: time.sleep(5) except KeyboardInterrupt: observer.stop() event_handler.close() self.logger_provider.shutdown() # 关闭OTLP处理器,确保数据flush self.ch_client.disconnect() print("程序已停止") observer.join() if __name__ == "__main__": print("=== 请先手动执行以下SQL创建ClickHouse表 ===") print(""" CREATE TABLE IF NOT EXISTS hyperdx_logs.clickhouse_info_logs ( timestamp DateTime64(3) CODEC(Delta, ZSTD), severity_text String CODEC(ZSTD), body String CODEC(ZSTD), service_name String CODEC(ZSTD), thread String CODEC(ZSTD), attributes Map(String, String) CODEC(ZSTD) ) ENGINE = MergeTree() ORDER BY (timestamp, service_name) """) print("===========================================\n") input("按Enter继续(确保已创建表)...") # 加载配置并启动采集 collector = ClickHouseLogCollector("config.ini") collector.start_monitoring()