深入解析:统一日志与链路追踪 Sleuth + Zipkin 实践

统一日志与链路追踪 Sleuth + Zipkin 实践

目录

  • 一、分布式追踪核心概念
  • 二、Sleuth 自动埋点机制
  • 三、Trace 上下文传播原理
  • 四、Zipkin 架构与数据流
  • ⚡ 五、消息中间件集成
  • 六、SkyWalking vs Jaeger 对比
  • 七、生产环境最佳实践

一、分布式追踪核心概念

分布式追踪的基本概念

调用链追踪核心元素

Trace - 完整调用链
Span - 单个工作单元
TraceId - 全局唯一标识
SpanId - 当前Span标识
ParentId - 父Span标识
Annotations - 时间点事件
Client Sent
Server Received
Server Sent
Client Received
Tags - 业务标签
HTTP方法
URL路径
状态码
错误信息

追踪数据模型

Span 数据结构定义

/**
* Span 数据模型
* 表示分布式系统中的单个工作单元
*/
@Data
@Builder
@AllArgsConstructor
public class Span {
// 标识信息
private String traceId;          // 追踪ID - 全局唯一
private String spanId;           // Span ID - 当前单元标识
private String parentSpanId;     // 父Span ID - 用于构建调用树
private String name;             // Span名称 - 操作描述
// 时间信息
private long timestamp;          // 开始时间戳
private long duration;           // 持续时间(微秒)
// 上下文信息
private Kind kind;               // 类型:CLIENT, SERVER, PRODUCER, CONSUMER
private boolean shared;          // 是否共享
private boolean debug;           // 是否调试模式
// 端点信息
private Endpoint localEndpoint;  // 本地服务端点
private Endpoint remoteEndpoint; // 远程服务端点
// 注解和标签
private List<Annotation> annotations;  // 时间点注解
  private Map<String, String> tags;      // 业务标签
    // 状态信息
    private boolean error;           // 是否发生错误
    private String errorMessage;     // 错误信息
    /**
    * Span 类型枚举
    */
    public enum Kind {
    CLIENT,     // 客户端调用
    SERVER,     // 服务端处理
    PRODUCER,   // 消息生产者
    CONSUMER    // 消息消费者
    }
    /**
    * 服务端点信息
    */
    @Data
    @AllArgsConstructor
    public static class Endpoint {
    private String serviceName;  // 服务名称
    private String ip;           // IP地址
    private int port;            // 端口号
    }
    /**
    * 时间点注解
    */
    @Data
    @AllArgsConstructor
    public static class Annotation {
    private long timestamp;     // 时间戳
    private String value;        // 注解值
    }
    }

二、Sleuth 自动埋点机制

️ Sleuth 自动配置架构

Sleuth 自动埋点组件

/**
* Sleuth 自动配置核心组件
* 负责自动注入追踪上下文
*/
@Configuration
@EnableAspectJAutoProxy
@Slf4j
public class SleuthAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public Tracer tracer(Sampler sampler, TraceContext traceContext) {
return new DefaultTracer(sampler, traceContext);
}
@Bean
@ConditionalOnMissingBean
public Sampler sampler() {
return Sampler.ALWAYS_SAMPLE; // 默认全量采样
}
@Bean
@ConditionalOnMissingBean
public TraceContext traceContext() {
return new DefaultTraceContext();
}
/**
* HTTP 请求拦截器 - 自动注入追踪头
*/
@Bean
public TracingFilter tracingFilter(Tracer tracer) {
return new TracingFilter(tracer);
}
/**
* 异步任务追踪支持
*/
@Bean
@ConditionalOnMissingBean
public TracingAsyncTaskExecutor tracingAsyncTaskExecutor(Tracer tracer) {
return new TracingAsyncTaskExecutor(tracer);
}
/**
* REST Template 拦截器
*/
@Bean
public RestTemplateCustomizer restTemplateCustomizer(Tracer tracer) {
return restTemplate -> {
List<ClientHttpRequestInterceptor> interceptors =
  new ArrayList<>(restTemplate.getInterceptors());
    interceptors.add(new TracingClientHttpRequestInterceptor(tracer));
    restTemplate.setInterceptors(interceptors);
    };
    }
    }
    /**
    * 默认追踪器实现
    */
    @Component
    @Slf4j
    public class DefaultTracer implements Tracer {
    private final Sampler sampler;
    private final TraceContext traceContext;
    private final Random random = new Random();
    public DefaultTracer(Sampler sampler, TraceContext traceContext) {
    this.sampler = sampler;
    this.traceContext = traceContext;
    }
    @Override
    public Span nextSpan() {
    // 从当前上下文获取或创建新的Span
    Span currentSpan = traceContext.getCurrentSpan();
    if (currentSpan != null) {
    return createChildSpan(currentSpan);
    } else {
    return createRootSpan();
    }
    }
    @Override
    public Span nextSpan(Span parent) {
    if (parent == null) {
    return createRootSpan();
    }
    return createChildSpan(parent);
    }
    /**
    * 创建根Span
    */
    private Span createRootSpan() {
    if (!sampler.isSampled()) {
    return Span.builder().sampled(false).build();
    }
    String traceId = generateTraceId();
    String spanId = generateSpanId();
    return Span.builder()
    .traceId(traceId)
    .spanId(spanId)
    .parentSpanId(null)
    .name("root")
    .timestamp(System.currentTimeMillis())
    .kind(Span.Kind.SERVER)
    .sampled(true)
    .build();
    }
    /**
    * 创建子Span
    */
    private Span createChildSpan(Span parent) {
    if (!parent.isSampled()) {
    return Span.builder().sampled(false).build();
    }
    String spanId = generateSpanId();
    return Span.builder()
    .traceId(parent.getTraceId())
    .spanId(spanId)
    .parentSpanId(parent.getSpanId())
    .name("child")
    .timestamp(System.currentTimeMillis())
    .kind(Span.Kind.CLIENT)
    .sampled(true)
    .localEndpoint(parent.getLocalEndpoint())
    .build();
    }
    /**
    * 生成Trace ID(128位)
    */
    private String generateTraceId() {
    byte[] bytes = new byte[16];
    random.nextBytes(bytes);
    return Hex.encodeHexString(bytes);
    }
    /**
    * 生成Span ID(64位)
    */
    private String generateSpanId() {
    byte[] bytes = new byte[8];
    random.nextBytes(bytes);
    return Hex.encodeHexString(bytes);
    }
    @Override
    public void close(Span span) {
    if (span != null && span.isSampled()) {
    span.setDuration(System.currentTimeMillis() - span.getTimestamp());
    log.debug("Span关闭: traceId={}, spanId={}, duration={}ms",
    span.getTraceId(), span.getSpanId(), span.getDuration());
    // 报告Span到收集器
    reportSpan(span);
    }
    }
    }

HTTP 请求自动追踪

HTTP 拦截器实现

/**
* HTTP 请求追踪过滤器
* 自动注入和传播追踪头
*/
@Component
@Slf4j
public class TracingFilter implements Filter {
private final Tracer tracer;
private static final String TRACE_HEADER = "X-B3-TraceId";
private static final String SPAN_HEADER = "X-B3-SpanId";
private static final String PARENT_HEADER = "X-B3-ParentSpanId";
private static final String SAMPLED_HEADER = "X-B3-Sampled";
public TracingFilter(Tracer tracer) {
this.tracer = tracer;
}
@Override
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException {
HttpServletRequest httpRequest = (HttpServletRequest) request;
HttpServletResponse httpResponse = (HttpServletResponse) response;
// 1. 从请求头提取或创建Span
Span span = extractOrCreateSpan(httpRequest);
try {
// 2. 将Span放入上下文
tracer.getTraceContext().setCurrentSpan(span);
// 3. 添加追踪头到响应
addTracingHeaders(httpResponse, span);
// 4. 记录服务器接收事件
logServerReceived(span, httpRequest);
// 5. 继续处理请求
chain.doFilter(request, response);
// 6. 记录服务器发送事件
logServerSent(span, httpResponse);
} catch (Exception e) {
// 7. 记录错误信息
span.setError(true);
span.setErrorMessage(e.getMessage());
span.tag("error", "true");
throw e;
} finally {
// 8. 关闭Span
tracer.close(span);
// 9. 清理上下文
tracer.getTraceContext().clear();
}
}
/**
* 从请求头提取或创建新的Span
*/
private Span extractOrCreateSpan(HttpServletRequest request) {
String traceId = request.getHeader(TRACE_HEADER);
String spanId = request.getHeader(SPAN_HEADER);
String parentSpanId = request.getHeader(PARENT_HEADER);
String sampled = request.getHeader(SAMPLED_HEADER);
if (traceId != null && spanId != null) {
// 从头部信息构建Span
return Span.builder()
.traceId(traceId)
.spanId(spanId)
.parentSpanId(parentSpanId)
.sampled("1".equals(sampled))
.kind(Span.Kind.SERVER)
.timestamp(System.currentTimeMillis())
.name(request.getMethod() + " " + request.getRequestURI())
.localEndpoint(buildEndpoint(request))
.build();
} else {
// 创建新的根Span
return tracer.nextSpan()
.name(request.getMethod() + " " + request.getRequestURI())
.kind(Span.Kind.SERVER)
.localEndpoint(buildEndpoint(request));
}
}
/**
* 添加追踪头到响应
*/
private void addTracingHeaders(HttpServletResponse response, Span span) {
if (span.isSampled()) {
response.setHeader(TRACE_HEADER, span.getTraceId());
response.setHeader(SPAN_HEADER, span.getSpanId());
response.setHeader(SAMPLED_HEADER, "1");
}
}
/**
* 记录服务器接收事件
*/
private void logServerReceived(Span span, HttpServletRequest request) {
if (span.isSampled()) {
span.annotation(new Annotation(System.currentTimeMillis(), "sr"));
// 添加HTTP相关标签
span.tag("http.method", request.getMethod());
span.tag("http.path", request.getRequestURI());
span.tag("http.host", request.getServerName());
span.tag("http.user_agent", request.getHeader("User-Agent"));
log.debug("服务器接收请求: {} {}, traceId: {}",
request.getMethod(), request.getRequestURI(), span.getTraceId());
}
}
/**
* 记录服务器发送事件
*/
private void logServerSent(Span span, HttpServletResponse response) {
if (span.isSampled()) {
span.annotation(new Annotation(System.currentTimeMillis(), "ss"));
span.tag("http.status_code", String.valueOf(response.getStatus()));
log.debug("服务器发送响应: status={}, traceId={}",
response.getStatus(), span.getTraceId());
}
}
}

三、Trace 上下文传播原理

上下文传播机制

Trace 上下文跨服务传播

/**
* 追踪上下文管理器
* 负责跨线程、跨服务传播追踪上下文
*/
@Component
@Slf4j
public class TraceContext {
private final ThreadLocal<Span> currentSpan = new ThreadLocal<>();
  private final ThreadLocal<Map<String, String>> extraContext = new ThreadLocal<>();
    /**
    * 获取当前Span
    */
    public Span getCurrentSpan() {
    return currentSpan.get();
    }
    /**
    * 设置当前Span
    */
    public void setCurrentSpan(Span span) {
    currentSpan.set(span);
    if (span != null) {
    log.debug("设置当前Span: traceId={}, spanId={}",
    span.getTraceId(), span.getSpanId());
    }
    }
    /**
    * 清除上下文
    */
    public void clear() {
    currentSpan.remove();
    if (extraContext.get() != null) {
    extraContext.get().clear();
    }
    extraContext.remove();
    }
    /**
    * 注入追踪头到HTTP请求
    */
    public void inject(HttpHeaders headers) {
    Span span = getCurrentSpan();
    if (span != null && span.isSampled()) {
    headers.set("X-B3-TraceId", span.getTraceId());
    headers.set("X-B3-SpanId", span.getSpanId());
    headers.set("X-B3-ParentSpanId", span.getParentSpanId());
    headers.set("X-B3-Sampled", "1");
    // 注入自定义上下文
    injectCustomContext(headers);
    }
    }
    /**
    * 从HTTP请求头提取上下文
    */
    public Span extract(HttpHeaders headers) {
    String traceId = headers.getFirst("X-B3-TraceId");
    String spanId = headers.getFirst("X-B3-SpanId");
    String parentSpanId = headers.getFirst("X-B3-ParentSpanId");
    String sampled = headers.getFirst("X-B3-Sampled");
    if (traceId != null && spanId != null) {
    return Span.builder()
    .traceId(traceId)
    .spanId(spanId)
    .parentSpanId(parentSpanId)
    .sampled("1".equals(sampled))
    .build();
    }
    return null;
    }
    }
    /**
    * 跨线程上下文传播
    */
    @Component
    @Slf4j
    public class TraceContextExecutor {
    private final TraceContext traceContext;
    /**
    * 包装Runnable以传播追踪上下文
    */
    public Runnable wrap(Runnable task) {
    Span currentSpan = traceContext.getCurrentSpan();
    Map<String, String> currentContext = copyExtraContext();
      return () -> {
      try {
      // 在新的线程中恢复上下文
      traceContext.setCurrentSpan(currentSpan);
      if (currentContext != null) {
      traceContext.getExtraContext().putAll(currentContext);
      }
      task.run();
      } finally {
      traceContext.clear();
      }
      };
      }
      /**
      * 包装Callable以传播追踪上下文
      */
      public <T> Callable<T> wrap(Callable<T> task) {
        Span currentSpan = traceContext.getCurrentSpan();
        Map<String, String> currentContext = copyExtraContext();
          return () -> {
          try {
          // 在新的线程中恢复上下文
          traceContext.setCurrentSpan(currentSpan);
          if (currentContext != null) {
          traceContext.getExtraContext().putAll(currentContext);
          }
          return task.call();
          } finally {
          traceContext.clear();
          }
          };
          }
          /**
          * 异步任务执行器
          */
          @Component
          public class TracingAsyncTaskExecutor implements AsyncTaskExecutor {
          private final AsyncTaskExecutor delegate;
          private final TraceContextExecutor contextExecutor;
          @Override
          public void execute(Runnable task, long startTimeout) {
          delegate.execute(contextExecutor.wrap(task), startTimeout);
          }
          @Override
          public Future<?> submit(Runnable task) {
            return delegate.submit(contextExecutor.wrap(task));
            }
            @Override
            public <T> Future<T> submit(Callable<T> task) {
              return delegate.submit(contextExecutor.wrap(task));
              }
              }
              }

Feign 客户端集成

Feign 客户端追踪集成

/**
* Feign 客户端追踪拦截器
*/
@Component
@Slf4j
public class TracingFeignInterceptor implements RequestInterceptor {
private final Tracer tracer;
private final TraceContext traceContext;
@Override
public void apply(RequestTemplate template) {
Span currentSpan = traceContext.getCurrentSpan();
if (currentSpan != null && currentSpan.isSampled()) {
// 创建客户端Span
Span clientSpan = tracer.nextSpan(currentSpan)
.name("feign:" + template.method() + " " + template.url())
.kind(Span.Kind.CLIENT)
.timestamp(System.currentTimeMillis());
// 记录客户端发送事件
clientSpan.annotation(new Annotation(System.currentTimeMillis(), "cs"));
// 注入追踪头
template.header("X-B3-TraceId", clientSpan.getTraceId());
template.header("X-B3-SpanId", clientSpan.getSpanId());
template.header("X-B3-ParentSpanId", currentSpan.getSpanId());
template.header("X-B3-Sampled", "1");
// 添加业务标签
clientSpan.tag("http.method", template.method());
clientSpan.tag("http.url", template.url());
clientSpan.tag("component", "feign");
// 将客户端Span保存到请求属性中
template.attribute("clientSpan", clientSpan);
log.debug("Feign请求追踪: {} {}, traceId: {}",
template.method(), template.url(), clientSpan.getTraceId());
}
}
/**
* Feign 响应处理
*/
@Component
public class TracingFeignLogger extends feign.Logger {
@Override
protected void log(String configKey, String format, Object... args) {
if (log.isDebugEnabled()) {
log.debug(format, args);
}
}
@Override
protected void logRequest(String configKey, Level logLevel, Request request) {
// 记录请求日志
if (log.isDebugEnabled()) {
super.logRequest(configKey, logLevel, request);
}
}
@Override
protected Response logAndRebufferResponse(String configKey, Level logLevel,
Response response, long elapsedTime) throws IOException {
// 记录响应并处理Span
Request request = response.request();
Span clientSpan = (Span) request.requestTemplate().attribute("clientSpan");
if (clientSpan != null) {
// 记录客户端接收事件
clientSpan.annotation(new Annotation(System.currentTimeMillis(), "cr"));
clientSpan.tag("http.status_code", String.valueOf(response.status()));
clientSpan.setDuration(elapsedTime * 1000); // 转换为微秒
// 关闭客户端Span
tracer.close(clientSpan);
}
return super.logAndRebufferResponse(configKey, logLevel, response, elapsedTime);
}
}
}

四、Zipkin 架构与数据流

️ Zipkin 系统架构

Zipkin 数据流架构

应用程序
Span数据收集
Zipkin Collector
Storage 存储
Zipkin Query Service
Zipkin UI
消息队列 Kafka/RabbitMQ
Elasticsearch
MySQL
Cassandra

Zipkin 集成配置

Spring Cloud Sleuth Zipkin 配置

# application.yml Zipkin配置
spring:
zipkin:
# Zipkin服务器地址
base-url: http://zipkin-server:9411
# 启用Zipkin报告
enabled: true
# 服务名称
service:
name: user-service
# 定位信息
location:
# 自动发现(通过服务发现)
discovery:
enabled: true
# 或者直接指定
host: localhost
port: 9411
# 发送配置
sender:
type: web # 支持web, kafka, rabbit
# 压缩配置
compression:
enabled: true
# 连接配置
connect-timeout: 5000
read-timeout: 10000
sleuth:
# 采样率配置
sampler:
probability: 1.0 # 1.0表示100%采样
# HTTP请求追踪
web:
client:
enabled: true
# 跳过某些路径
skip-pattern: /health,/info
# 异步追踪
async:
enabled: true
# 消息追踪
messaging:
enabled: true
# 调度任务追踪
schedule:
enabled: true
# Redis追踪
redis:
enabled: true
# 数据库追踪
jdbc:
enabled: true
# Zipkin客户端高级配置
management:
endpoints:
web:
exposure:
include: health,info,metrics,zipkin
endpoint:
zipkin:
enabled: true
metrics:
export:
zipkin:
enabled: true
# 日志配置(显示TraceId)
logging:
pattern:
level: "%5p [${spring.zipkin.service.name:},%X{traceId:-},%X{spanId:-}]"

Zipkin 报告器实现

Zipkin Span 报告器

/**
* Zipkin Span 报告器
* 负责将Span数据发送到Zipkin服务器
*/
@Component
@Slf4j
public class ZipkinSpanReporter implements SpanReporter {
private final ZipkinRestTemplateSender sender;
private final ObjectMapper objectMapper;
private final MeterRegistry meterRegistry;
// 指标监控
private final Counter spansSentCounter;
private final Counter spansDroppedCounter;
private final Timer sendTimer;
public ZipkinSpanReporter(ZipkinRestTemplateSender sender,
ObjectMapper objectMapper,
MeterRegistry meterRegistry) {
this.sender = sender;
this.objectMapper = objectMapper;
this.meterRegistry = meterRegistry;
// 初始化指标
this.spansSentCounter = meterRegistry.counter("zipkin.spans.sent");
this.spansDroppedCounter = meterRegistry.counter("zipkin.spans.dropped");
this.sendTimer = meterRegistry.timer("zipkin.send.duration");
}
@Override
public void report(Span span) {
if (!span.isSampled()) {
spansDroppedCounter.increment();
return;
}
try {
// 转换Span为Zipkin格式
zipkin2.Span zipkinSpan = convertToZipkinSpan(span);
// 发送Span到Zipkin
sendTimer.record(() -> {
try {
sender.sendSpans(Collections.singletonList(zipkinSpan));
spansSentCounter.increment();
if (log.isDebugEnabled()) {
log.debug("Span发送成功: traceId={}, spanId={}",
span.getTraceId(), span.getSpanId());
}
} catch (Exception e) {
log.error("Span发送失败", e);
spansDroppedCounter.increment();
}
});
} catch (Exception e) {
log.error("Span转换失败", e);
spansDroppedCounter.increment();
}
}
/**
* 转换内部Span为Zipkin格式
*/
private zipkin2.Span convertToZipkinSpan(Span span) {
zipkin2.Span.Builder builder = zipkin2.Span.newBuilder()
.traceId(span.getTraceId())
.id(span.getSpanId())
.name(span.getName())
.timestamp(span.getTimestamp() * 1000) // 转换为微秒
.duration(span.getDuration() * 1000);  // 转换为微秒
// 设置父Span
if (span.getParentSpanId() != null) {
builder.parentId(span.getParentSpanId());
}
// 设置本地端点
if (span.getLocalEndpoint() != null) {
builder.localEndpoint(zipkin2.Endpoint.newBuilder()
.serviceName(span.getLocalEndpoint().getServiceName())
.ip(span.getLocalEndpoint().getIp())
.port(span.getLocalEndpoint().getPort())
.build());
}
// 设置远程端点
if (span.getRemoteEndpoint() != null) {
builder.remoteEndpoint(zipkin2.Endpoint.newBuilder()
.serviceName(span.getRemoteEndpoint().getServiceName())
.ip(span.getRemoteEndpoint().getIp())
.port(span.getRemoteEndpoint().getPort())
.build());
}
// 添加注解
if (span.getAnnotations() != null) {
for (Annotation annotation : span.getAnnotations()) {
builder.addAnnotation(annotation.getTimestamp() * 1000, annotation.getValue());
}
}
// 添加标签
if (span.getTags() != null) {
for (Map.Entry<String, String> tag : span.getTags().entrySet()) {
  builder.putTag(tag.getKey(), tag.getValue());
  }
  }
  // 设置Kind
  if (span.getKind() != null) {
  switch (span.getKind()) {
  case CLIENT:
  builder.kind(zipkin2.Span.Kind.CLIENT);
  break;
  case SERVER:
  builder.kind(zipkin2.Span.Kind.SERVER);
  break;
  case PRODUCER:
  builder.kind(zipkin2.Span.Kind.PRODUCER);
  break;
  case CONSUMER:
  builder.kind(zipkin2.Span.Kind.CONSUMER);
  break;
  }
  }
  // 设置共享标志
  if (span.isShared()) {
  builder.shared(true);
  }
  // 设置调试标志
  if (span.isDebug()) {
  builder.debug(true);
  }
  return builder.build();
  }
  }
  /**
  * Zipkin REST API 发送器
  */
  @Component
  @Slf4j
  public class ZipkinRestTemplateSender {
  private final RestTemplate restTemplate;
  private final String zipkinBaseUrl;
  private final ObjectMapper objectMapper;
  public ZipkinRestTemplateSender(RestTemplate restTemplate,
  @Value("${spring.zipkin.base-url}") String zipkinBaseUrl,
  ObjectMapper objectMapper) {
  this.restTemplate = restTemplate;
  this.zipkinBaseUrl = zipkinBaseUrl;
  this.objectMapper = objectMapper;
  }
  /**
  * 批量发送Span到Zipkin
  */
  public void sendSpans(List<zipkin2.Span> spans) {
    if (spans.isEmpty()) {
    return;
    }
    try {
    // 序列化Span列表
    String jsonSpans = objectMapper.writeValueAsString(spans);
    // 构建请求
    HttpHeaders headers = new HttpHeaders();
    headers.setContentType(MediaType.APPLICATION_JSON);
    headers.set("Content-Encoding", "gzip");
    HttpEntity<byte[]> request = new HttpEntity<>(
      gzipCompress(jsonSpans), headers);
      // 发送到Zipkin
      ResponseEntity<String> response = restTemplate.postForEntity(
        zipkinBaseUrl + "/api/v2/spans", request, String.class);
        if (!response.getStatusCode().is2xxSuccessful()) {
        log.warn("Zipkin响应异常: {}", response.getStatusCode());
        }
        } catch (Exception e) {
        throw new RuntimeException("发送Span到Zipkin失败", e);
        }
        }
        /**
        * GZIP压缩
        */
        private byte[] gzipCompress(String data) throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream(data.length());
        GZIPOutputStream gzip = new GZIPOutputStream(bos);
        gzip.write(data.getBytes(StandardCharsets.UTF_8));
        gzip.close();
        return bos.toByteArray();
        }
        }

⚡ 五、消息中间件集成

Kafka 消息追踪

Kafka 消息追踪集成

/**
* Kafka 消息追踪支持
* 自动注入和提取追踪上下文
*/
@Configuration
@Slf4j
public class KafkaTracingConfiguration {
@Bean
public TracingProducerFactory<String, String> tracingProducerFactory(
  ProducerFactory<String, String> producerFactory,
    Tracer tracer) {
    return new TracingProducerFactory<>(producerFactory, tracer);
      }
      @Bean
      public TracingConsumerFactory<String, String> tracingConsumerFactory(
        ConsumerFactory<String, String> consumerFactory,
          Tracer tracer) {
          return new TracingConsumerFactory<>(consumerFactory, tracer);
            }
            }
            /**
            * 追踪生产者工厂
            */
            @Component
            @Slf4j
            public class TracingProducerFactory<K, V> implements ProducerFactory<K, V> {
              private final ProducerFactory<K, V> delegate;
                private final Tracer tracer;
                private static final String TRACE_HEADER = "traceContext";
                @Override
                public Producer<K, V> createProducer() {
                  return new TracingProducer<>(delegate.createProducer(), tracer);
                    }
                    /**
                    * 追踪生产者包装器
                    */
                    private static class TracingProducer<K, V> implements Producer<K, V> {
                      private final Producer<K, V> delegate;
                        private final Tracer tracer;
                        public TracingProducer(Producer<K, V> delegate, Tracer tracer) {
                          this.delegate = delegate;
                          this.tracer = tracer;
                          }
                          @Override
                          public Future<RecordMetadata> send(ProducerRecord<K, V> record) {
                            // 注入追踪上下文到消息头
                            ProducerRecord<K, V> tracedRecord = injectTraceContext(record);
                              return delegate.send(tracedRecord);
                              }
                              @Override
                              public Future<RecordMetadata> send(ProducerRecord<K, V> record, Callback callback) {
                                ProducerRecord<K, V> tracedRecord = injectTraceContext(record);
                                  return delegate.send(tracedRecord, callback);
                                  }
                                  private ProducerRecord<K, V> injectTraceContext(ProducerRecord<K, V> record) {
                                    Span currentSpan = tracer.getTraceContext().getCurrentSpan();
                                    if (currentSpan != null && currentSpan.isSampled()) {
                                    // 创建生产者Span
                                    Span producerSpan = tracer.nextSpan(currentSpan)
                                    .name("kafka:produce:" + record.topic())
                                    .kind(Span.Kind.PRODUCER)
                                    .timestamp(System.currentTimeMillis());
                                    // 记录生产者发送事件
                                    producerSpan.annotation(new Annotation(System.currentTimeMillis(), "ms"));
                                    producerSpan.tag("messaging.system", "kafka");
                                    producerSpan.tag("messaging.destination", record.topic());
                                    producerSpan.tag("messaging.destination_kind", "topic");
                                    // 注入到消息头
                                    Headers headers = record.headers();
                                    headers.add("X-B3-TraceId", producerSpan.getTraceId().getBytes());
                                    headers.add("X-B3-SpanId", producerSpan.getSpanId().getBytes());
                                    headers.add("X-B3-ParentSpanId", currentSpan.getSpanId().getBytes());
                                    headers.add("X-B3-Sampled", "1".getBytes());
                                    // 添加自定义追踪头
                                    headers.add("X-Trace-Service", "user-service".getBytes());
                                    log.debug("Kafka消息追踪 - 发送: topic={}, traceId={}",
                                    record.topic(), producerSpan.getTraceId());
                                    // 立即关闭生产者Span(异步发送)
                                    tracer.close(producerSpan);
                                    }
                                    return record;
                                    }
                                    }
                                    }

六、SkyWalking vs Jaeger 对比

功能对比分析

三大追踪系统对比

特性Spring Cloud Sleuth + ZipkinSkyWalkingJaeger
架构模式客户端-服务器探针-收集器客户端-收集器
数据存储ES, MySQL, CassandraES, H2, MySQL, TiDBCassandra, ES, Kafka
UI界面Zipkin UISkyWalking UIJaeger UI
语言支持Java为主,多语言支持多语言探针多语言客户端
性能开销中等低-中等
安装部署简单中等简单
生态系统Spring Cloud生态Apache项目CNCF项目
监控维度调用链、延迟拓扑图、指标、追踪分布式追踪

迁移到 SkyWalking

SkyWalking 配置示例

# agent.config
# 服务名称
agent.service_name=${SW_AGENT_NAME:user-service}
# 后端服务地址
collector.backend_service=${SW_AGENT_COLLECTOR:127.0.0.1:11800}
# 采样配置
agent.sample_n_per_3_secs=${SW_AGENT_SAMPLE:-1}
# 忽略后缀
agent.ignore_suffix=${SW_AGENT_IGNORE_SUFFIX:.jpg,.jpeg,.png,.gif,.css,.js}
# 跨进程传播配置
agent.cross_process_propagation_config=${SW_AGENT_CROSS_PROPAGATION:true}

七、生产环境最佳实践

生产级配置

Sleuth + Zipkin 生产配置

spring:
sleuth:
# 采样配置
sampler:
probability: 0.1  # 生产环境建议10%采样率
# 日志关联
log:
slf4j:
whitelist-mdc-keys: traceId,spanId,parentSpanId
# 异步配置
async:
enabled: true
configurer:
enabled: true
# 调度任务配置
schedule:
enabled: true
skip-pattern: healthCheckTask
# 消息配置
messaging:
enabled: true
rabbit:
enabled: true
kafka:
enabled: true
zipkin:
# 生产环境Zipkin集群
base-url: http://zipkin-cluster:9411
# 发送配置
sender:
type: web
# 压缩启用
compression:
enabled: true
# 连接池配置
rest-template:
max-total-connections: 100
max-per-route: 20
connection-timeout: 5000
read-timeout: 10000
# 日志模式配置
logging:
pattern:
level: "%5p [${spring.application.name:-},%X{traceId:-},%X{spanId:-}]"
level:
org.springframework.cloud.sleuth: INFO
brave: WARN
zipkin2: WARN
# 监控配置
management:
endpoints:
web:
exposure:
include: health,metrics,prometheus,sleuth
endpoint:
sleuth:
enabled: true
metrics:
export:
zipkin:
enabled: true
step: 1m

性能优化建议

高并发场景优化

/**
* 高性能追踪配置
* 针对高并发场景优化
*/
@Configuration
@Slf4j
public class HighPerformanceTracingConfig {
/**
* 异步Span报告器
* 避免阻塞业务线程
*/
@Bean
@Primary
public SpanReporter asyncSpanReporter(SpanReporter delegate) {
return new AsyncSpanReporter(delegate);
}
/**
* 批量Span报告器
* 减少网络请求
*/
@Bean
public SpanReporter batchingSpanReporter(SpanReporter delegate) {
return BatchingSpanReporter.wrap(delegate)
.maxBatchSize(100)           // 最大批量大小
.maxConcurrentBatches(5)     // 最大并发批次
.batchInterval(Duration.ofSeconds(5))  // 批量间隔
.build();
}
/**
* 采样策略优化
*/
@Bean
public Sampler adaptiveSampler() {
return new AdaptiveSampler()
.baseProbability(0.01)      // 基础采样率1%
.maxProbability(0.5)        // 最大采样率50%
.windowSize(1000)           // 采样窗口
.build();
}
/**
* 自适应采样器
*/
@Slf4j
public static class AdaptiveSampler implements Sampler {
private final double baseProbability;
private final double maxProbability;
private final int windowSize;
private final AtomicInteger requestCount = new AtomicInteger(0);
private final AtomicInteger sampleCount = new AtomicInteger(0);
private volatile double currentProbability;
public AdaptiveSampler(double baseProbability, double maxProbability, int windowSize) {
this.baseProbability = baseProbability;
this.maxProbability = maxProbability;
this.windowSize = windowSize;
this.currentProbability = baseProbability;
}
@Override
public boolean isSampled() {
int total = requestCount.incrementAndGet();
// 滑动窗口
if (total % windowSize == 0) {
adjustSamplingRate();
}
// 概率采样
return Math.random() < currentProbability;
}
private void adjustSamplingRate() {
int sampled = sampleCount.get();
double actualRate = (double) sampled / windowSize;
// 动态调整采样率
if (actualRate < baseProbability * 0.5) {
currentProbability = Math.min(currentProbability * 1.2, maxProbability);
} else if (actualRate > baseProbability * 1.5) {
currentProbability = Math.max(currentProbability * 0.8, baseProbability);
}
// 重置计数器
requestCount.set(0);
sampleCount.set(0);
log.info("调整采样率: {} -> {}", actualRate, currentProbability);
}
}
}

洞察:分布式追踪是微服务可观测性的核心支柱。合理的采样策略、高效的数据收集和智能的上下文传播,是构建生产级追踪系统的关键。理解数据流和性能影响,才能在业务需求和系统开销之间找到最佳平衡。


如果觉得本文对你有帮助,请点击 点赞 + ⭐ 收藏 + 留言支持!

讨论话题

  1. 你在生产环境中使用哪种分布式追踪方案?有什么经验教训?
  2. 如何设计合理的采样策略来平衡性能和可观测性?
  3. 在跨语言微服务架构中如何实现完整的链路追踪?

相关资源推荐

  • https://spring.io/projects/spring-cloud-sleuth
  • https://zipkin.io/
  • https://github.com/example/sleuth-zipkin-demo
posted @ 2025-12-10 19:11  gccbuaa  阅读(13)  评论(0)    收藏  举报