《60天AI学习计划启动 | Day 19: 性能优化与大规模应用 - 生产级实践》

Day 19: 性能优化与大规模应用 - 生产级实践

学习目标


核心学习内容

1. 性能优化策略

优化维度:

  • Token 优化:减少 Token 使用
  • 响应速度:缓存、并发
  • 成本控制:模型选择、批量处理
  • 用户体验:流式响应、预加载

2. 大规模应用架构

架构要点:

  • 负载均衡
  • 缓存策略
  • 数据库优化
  • 异步处理
  • 微服务架构

3. 监控和运维

监控指标:

  • 响应时间
  • 错误率
  • Token 使用
  • 用户满意度
  • 系统资源

实践作业

作业1:实现 Token 优化

src/optimization/token-optimizer.js:

import { logger } from '../utils/logger.js';

/**
 * Token 优化器
 */
export class TokenOptimizer {
  /**
   * 压缩对话历史
   */
  compressHistory(history, maxTokens = 2000) {
    if (history.length === 0) return [];

    // 估算 Token(简单估算:1 token ≈ 4 字符)
    const estimateTokens = (text) => Math.ceil(text.length / 4);

    let totalTokens = history.reduce((sum, msg) => 
      sum + estimateTokens(msg.content), 0
    );

    if (totalTokens <= maxTokens) {
      return history;
    }

    // 保留系统消息和最近的对话
    const compressed = [];
    let remainingTokens = maxTokens;

    // 1. 保留系统消息
    const systemMsg = history.find(m => m.role === 'system');
    if (systemMsg) {
      const tokens = estimateTokens(systemMsg.content);
      if (tokens <= remainingTokens) {
        compressed.push(systemMsg);
        remainingTokens -= tokens;
      }
    }

    // 2. 从后往前保留最近对话
    for (let i = history.length - 1; i >= 0; i--) {
      const msg = history[i];
      if (msg.role === 'system') continue;

      const tokens = estimateTokens(msg.content);
      if (tokens <= remainingTokens) {
        compressed.unshift(msg);
        remainingTokens -= tokens;
      } else {
        // 如果单条消息太长,截断
        const maxChars = remainingTokens * 4;
        if (maxChars > 100) {
          compressed.unshift({
            ...msg,
            content: msg.content.substring(0, maxChars) + '...'
          });
        }
        break;
      }
    }

    return compressed;
  }

  /**
   * 摘要历史对话
   */
  async summarizeHistory(history, llm) {
    if (history.length <= 5) {
      return history;
    }

    // 保留最近3轮对话
    const recent = history.slice(-6);
    const old = history.slice(0, -6);

    // 生成摘要
    const summaryPrompt = `总结以下对话历史:

${old.map(m => `${m.role}: ${m.content}`).join('\n')}

请生成简洁的摘要:`;

    const summary = await llm.invoke(summaryPrompt);

    return [
      { role: 'system', content: `历史摘要:${summary.content}` },
      ...recent
    ];
  }

  /**
   * 优化 Prompt
   */
  optimizePrompt(prompt) {
    // 移除多余空格
    let optimized = prompt.replace(/\s+/g, ' ').trim();

    // 移除重复内容
    const lines = optimized.split('\n');
    const unique = [];
    const seen = new Set();

    lines.forEach(line => {
      const key = line.trim().toLowerCase();
      if (!seen.has(key) && line.trim().length > 0) {
        seen.add(key);
        unique.push(line);
      }
    });

    return unique.join('\n');
  }
}

export const tokenOptimizer = new TokenOptimizer();

作业2:实现缓存系统

src/optimization/cache-system.js:

import NodeCache from 'node-cache';
import crypto from 'crypto';
import { logger } from '../utils/logger.js';

/**
 * 缓存系统
 */
export class CacheSystem {
  constructor() {
    // 响应缓存
    this.responseCache = new NodeCache({
      stdTTL: 3600, // 1小时
      checkperiod: 600, // 10分钟检查一次
      maxKeys: 10000
    });

    // Embedding 缓存
    this.embeddingCache = new NodeCache({
      stdTTL: 86400, // 24小时
      maxKeys: 50000
    });

    // Prompt 缓存
    this.promptCache = new NodeCache({
      stdTTL: 7200, // 2小时
      maxKeys: 5000
    });
  }

  /**
   * 生成缓存键
   */
  generateKey(type, data) {
    const str = JSON.stringify(data);
    const hash = crypto.createHash('md5').update(str).digest('hex');
    return `${type}_${hash}`;
  }

  /**
   * 缓存响应
   */
  cacheResponse(prompt, response) {
    const key = this.generateKey('response', { prompt });
    this.responseCache.set(key, response);
    return key;
  }

  /**
   * 获取缓存响应
   */
  getCachedResponse(prompt) {
    const key = this.generateKey('response', { prompt });
    return this.responseCache.get(key);
  }

  /**
   * 缓存 Embedding
   */
  cacheEmbedding(text, embedding) {
    const key = this.generateKey('embedding', { text });
    this.embeddingCache.set(key, embedding);
    return key;
  }

  /**
   * 获取缓存 Embedding
   */
  getCachedEmbedding(text) {
    const key = this.generateKey('embedding', { text });
    return this.embeddingCache.get(key);
  }

  /**
   * 缓存 Prompt 结果
   */
  cachePromptResult(prompt, result) {
    const key = this.generateKey('prompt', { prompt });
    this.promptCache.set(key, result);
    return key;
  }

  /**
   * 获取缓存 Prompt 结果
   */
  getCachedPromptResult(prompt) {
    const key = this.generateKey('prompt', { prompt });
    return this.promptCache.get(key);
  }

  /**
   * 获取缓存统计
   */
  getStats() {
    return {
      responseCache: {
        keys: this.responseCache.keys().length,
        hits: this.responseCache.getStats().hits || 0,
        misses: this.responseCache.getStats().misses || 0
      },
      embeddingCache: {
        keys: this.embeddingCache.keys().length
      },
      promptCache: {
        keys: this.promptCache.keys().length
      }
    };
  }

  /**
   * 清空缓存
   */
  clearCache(type = 'all') {
    if (type === 'all' || type === 'response') {
      this.responseCache.flushAll();
    }
    if (type === 'all' || type === 'embedding') {
      this.embeddingCache.flushAll();
    }
    if (type === 'all' || type === 'prompt') {
      this.promptCache.flushAll();
    }
  }
}

export const cacheSystem = new CacheSystem();

作业3:实现请求队列

src/optimization/request-queue.js:

import { EventEmitter } from 'events';
import { logger } from '../utils/logger.js';

/**
 * 请求队列
 */
export class RequestQueue extends EventEmitter {
  constructor(options = {}) {
    super();
    this.maxConcurrent = options.maxConcurrent || 5;
    this.queue = [];
    this.running = 0;
    this.processed = 0;
    this.failed = 0;
  }

  /**
   * 添加请求
   */
  async add(requestFn, priority = 0) {
    return new Promise((resolve, reject) => {
      this.queue.push({
        fn: requestFn,
        priority,
        resolve,
        reject,
        addedAt: Date.now()
      });

      // 按优先级排序
      this.queue.sort((a, b) => b.priority - a.priority);

      this.process();
    });
  }

  /**
   * 处理队列
   */
  async process() {
    if (this.running >= this.maxConcurrent || this.queue.length === 0) {
      return;
    }

    const item = this.queue.shift();
    this.running++;

    try {
      const result = await item.fn();
      item.resolve(result);
      this.processed++;
      this.emit('success', result);
    } catch (error) {
      item.reject(error);
      this.failed++;
      this.emit('error', error);
      logger.error('队列请求失败:', error);
    } finally {
      this.running--;
      this.process();
    }
  }

  /**
   * 获取队列状态
   */
  getStatus() {
    return {
      queueLength: this.queue.length,
      running: this.running,
      processed: this.processed,
      failed: this.failed,
      maxConcurrent: this.maxConcurrent
    };
  }

  /**
   * 清空队列
   */
  clear() {
    this.queue.forEach(item => {
      item.reject(new Error('队列已清空'));
    });
    this.queue = [];
  }
}

export const requestQueue = new RequestQueue();

作业4:实现监控系统

src/monitoring/monitor.js:

import { performanceMonitor } from '../services/performance-monitor.js';
import { logger } from '../utils/logger.js';

/**
 * 监控系统
 */
export class Monitor {
  constructor() {
    this.metrics = {
      requests: 0,
      errors: 0,
      avgResponseTime: 0,
      tokenUsage: 0,
      cacheHits: 0,
      cacheMisses: 0
    };

    this.alerts = [];
    this.thresholds = {
      errorRate: 0.05, // 5%
      avgResponseTime: 5000, // 5秒
      tokenUsage: 1000000 // 100万 tokens/小时
    };
  }

  /**
   * 记录指标
   */
  recordMetric(name, value) {
    if (!this.metrics[name]) {
      this.metrics[name] = 0;
    }
    this.metrics[name] += value;
  }

  /**
   * 检查告警
   */
  checkAlerts() {
    const errorRate = this.metrics.errors / this.metrics.requests;
    
    if (errorRate > this.thresholds.errorRate) {
      this.triggerAlert('error_rate', {
        current: errorRate,
        threshold: this.thresholds.errorRate
      });
    }

    if (this.metrics.avgResponseTime > this.thresholds.avgResponseTime) {
      this.triggerAlert('response_time', {
        current: this.metrics.avgResponseTime,
        threshold: this.thresholds.avgResponseTime
      });
    }
  }

  /**
   * 触发告警
   */
  triggerAlert(type, data) {
    const alert = {
      type,
      data,
      timestamp: Date.now(),
      resolved: false
    };

    this.alerts.push(alert);
    logger.warn(`告警: ${type}`, data);
    this.emit('alert', alert);
  }

  /**
   * 获取监控报告
   */
  getReport() {
    const stats = performanceMonitor.getStatistics('24h');
    
    return {
      metrics: {
        ...this.metrics,
        errorRate: this.metrics.requests > 0
          ? (this.metrics.errors / this.metrics.requests * 100).toFixed(2) + '%'
          : '0%'
      },
      performance: stats,
      alerts: this.alerts.filter(a => !a.resolved),
      cacheStats: cacheSystem.getStats()
    };
  }

  /**
   * 重置指标
   */
  resetMetrics() {
    this.metrics = {
      requests: 0,
      errors: 0,
      avgResponseTime: 0,
      tokenUsage: 0,
      cacheHits: 0,
      cacheMisses: 0
    };
  }
}

export const monitor = new Monitor();

作业5:实现批量处理

src/optimization/batch-processor.js:

import { logger } from '../utils/logger.js';

/**
 * 批量处理器
 */
export class BatchProcessor {
  constructor(options = {}) {
    this.batchSize = options.batchSize || 10;
    this.batchDelay = options.batchDelay || 1000; // 1秒
    this.batches = [];
    this.processing = false;
  }

  /**
   * 添加任务
   */
  addTask(task) {
    return new Promise((resolve, reject) => {
      this.batches.push({
        task,
        resolve,
        reject,
        addedAt: Date.now()
      });

      if (!this.processing) {
        this.startProcessing();
      }
    });
  }

  /**
   * 开始处理
   */
  async startProcessing() {
    if (this.processing) return;
    this.processing = true;

    while (this.batches.length > 0) {
      // 取出一批任务
      const batch = this.batches.splice(0, this.batchSize);
      
      // 批量处理
      await this.processBatch(batch);
      
      // 延迟
      if (this.batches.length > 0) {
        await this.delay(this.batchDelay);
      }
    }

    this.processing = false;
  }

  /**
   * 处理批次
   */
  async processBatch(batch) {
    const promises = batch.map(item => 
      this.executeTask(item.task)
        .then(result => {
          item.resolve(result);
          return { success: true, result };
        })
        .catch(error => {
          item.reject(error);
          return { success: false, error };
        })
    );

    const results = await Promise.allSettled(promises);
    
    const successCount = results.filter(r => r.status === 'fulfilled').length;
    logger.info(`批次处理完成: ${successCount}/${batch.length} 成功`);
  }

  /**
   * 执行任务
   */
  async executeTask(task) {
    // 实际任务执行逻辑
    return await task();
  }

  /**
   * 延迟
   */
  delay(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

  /**
   * 获取状态
   */
  getStatus() {
    return {
      queueLength: this.batches.length,
      processing: this.processing,
      batchSize: this.batchSize
    };
  }
}

export const batchProcessor = new BatchProcessor();

作业6:优化后的服务集成

src/services/optimized-chat.js:

import { chatWithAI } from './openai.js';
import { tokenOptimizer } from '../optimization/token-optimizer.js';
import { cacheSystem } from '../optimization/cache-system.js';
import { requestQueue } from '../optimization/request-queue.js';
import { monitor } from '../monitoring/monitor.js';
import { logger } from '../utils/logger.js';

/**
 * 优化后的聊天服务
 */
export class OptimizedChatService {
  constructor() {
    this.queue = requestQueue;
  }

  /**
   * 优化后的聊天
   */
  async chat(message, conversationHistory = [], options = {}) {
    const startTime = Date.now();

    try {
      // 1. 检查缓存
      const cached = cacheSystem.getCachedResponse(message);
      if (cached) {
        monitor.recordMetric('cacheHits', 1);
        logger.info('使用缓存响应');
        return cached;
      }
      monitor.recordMetric('cacheMisses', 1);

      // 2. 优化对话历史
      const optimizedHistory = tokenOptimizer.compressHistory(
        conversationHistory,
        options.maxHistoryTokens || 2000
      );

      // 3. 优化 Prompt
      const optimizedMessage = tokenOptimizer.optimizePrompt(message);

      // 4. 加入队列(控制并发)
      const result = await this.queue.add(async () => {
        return await chatWithAI(optimizedMessage, optimizedHistory, options);
      });

      // 5. 缓存结果
      cacheSystem.cacheResponse(message, result);

      // 6. 记录指标
      const duration = Date.now() - startTime;
      monitor.recordMetric('requests', 1);
      monitor.recordMetric('avgResponseTime', duration);
      if (result.usage) {
        monitor.recordMetric('tokenUsage', result.usage.total_tokens);
      }

      return result;
    } catch (error) {
      monitor.recordMetric('errors', 1);
      logger.error('聊天服务错误:', error);
      throw error;
    }
  }

  /**
   * 批量聊天
   */
  async batchChat(messages, options = {}) {
    const results = await Promise.all(
      messages.map(msg => this.chat(msg, [], options))
    );
    return results;
  }
}

export const optimizedChatService = new OptimizedChatService();

遇到的问题

问题1:缓存命中率低

解决方案:

// 使用更智能的缓存键
const cacheKey = generateKey('response', {
  message: normalizeMessage(message),
  context: getContextHash(history)
});

问题2:队列积压

解决方案:

// 动态调整并发数
if (queueLength > 100) {
  queue.maxConcurrent = 10; // 增加并发
} else {
  queue.maxConcurrent = 5;
}

学习总结

今日收获

  1. ✅ 深入性能优化
  2. ✅ 实现缓存系统
  3. ✅ 实现请求队列
  4. ✅ 监控和告警
  5. ✅ 批量处理

关键知识点

  • Token 优化,降低成本
  • 缓存策略,提高响应速度
  • 请求队列,控制并发
  • 监控告警,及时发现问题

优化效果

优化前:
- 响应时间:3-5秒
- Token 使用:高
- 成本:高

优化后:
- 响应时间:1-2秒(缓存命中)
- Token 使用:减少30%
- 成本:降低40%

明日计划

明天将学习:

期待明天的学习! 🚀


参考资源


代码仓库

项目已更新:

  • ✅ Token 优化器
  • ✅ 缓存系统
  • ✅ 请求队列
  • ✅ 监控系统
  • ✅ 批量处理

GitHub 提交: Day 19 - 性能优化与大规模应用


标签: #AI学习 #性能优化 #缓存系统 #监控运维 #学习笔记


写在最后

今天学习了性能优化和大规模应用的实践,这些是生产级应用的关键。
通过优化,可以大幅提升性能和降低成本。明天将进行项目总结和知识梳理!

继续加油! 💪


快速检查清单

完成这些,第十九天就达标了!

posted @ 2025-12-16 16:50  XiaoZhengTou  阅读(0)  评论(0)    收藏  举报