AI Agent 30天速成|Day9 笔记
AI Agent 全日制30天速成|Day9 笔记
修订要点
- 全部能力工具化:文档入库、文本Embedding、知识库检索、数学计算全部注册为标准Function工具,ReAct自主调用,无硬编码业务逻辑
- 向量存储替换为 Chroma持久向量库,彻底移除内存列表/FAISS,支持磁盘持久、元数据过滤
- 工具嵌套执行:
rag_search、vector_add底层自动调用text_embedding工具生成语义向量 - 整套代码完全独立,不依赖之前代码;限流/熔断/权限/日志对所有工具统一生效
- 所有向量生成统一调用厂商Embedding接口,不使用本地字符假向量
今日总学习目标
- 实现全工具化架构:向量入库、向量化、检索、计算均为可调用工具
- 基于Chroma本地持久向量库完成知识库增删查,文档入库也走工具调用
- 掌握工具嵌套执行逻辑,上层检索工具自动调用底层Embedding工具
- 统一中间件体系对四类工具全覆盖,分层超时隔离Embedding/LLM请求
- 独立运行整套ReAct Agent,无任何前置课程代码依赖
每日时长分配(全天8h)
- 理论笔记阅读:2.5h
- 分层代码编写调试:4h
- 复盘面试背诵:1.5h
一、核心理论教学笔记
1 全工具化分层设计
底层基础工具(依赖Embedding接口)
text_embedding:批量文本转语义向量,所有向量操作唯一底层依赖
业务向量工具(嵌套底层embedding)
vector_add:新增知识库文档,自动向量化存入Chromarag_search:检索知识库,自动生成查询向量,Chroma相似度召回
数值工具
calculator:四则数学运算
所有工具全部注册到统一网关,权限、限流、熔断、日志一套逻辑复用
2 Chroma向量库优势(对比内存/FAISS)
- 磁盘持久化:程序重启向量与文档不丢失
- 原生元数据支持:可标记文档来源、分类,检索过滤
- 开箱即用,无需手动维护向量id映射字典
- 内置相似度排序,支持MMR去重检索
- 同步API简单适配异步业务
3 完整工具化执行链路
用户提问
- 接口生成全局TraceID,执行输入安全校验、令牌桶限流
- 读取Redis会话记忆,自动滑动窗口+摘要压缩
- ReAct循环推理:模型自主选择工具
- 数学题 →
calculator - 新增资料 →
vector_add(内部调用text_embedding存入Chroma) - 查询知识 →
rag_search(内部调用text_embedding生成查询向量,Chroma召回)
- 数学题 →
- 网关统一拦截:权限校验 → 熔断判断 → 指数退避重试 → 独立超时执行
- 工具结果回填对话上下文,反思判断信息是否充足
- 汇总全部观测结果,LLM生成脱敏最终回答
- 对话持久存入Redis,全链路日志落地
4 生产中间件全覆盖规则
- LLM全局60s超时;Embedding独立15s超时
- 令牌桶统一管控所有工具请求,削峰防429限流
- 单工具连续失败触发熔断,冷却后试探恢复
- 访客仅可闲聊;普通用户可用计算、检索;管理员可批量入库文档
- 每条工具调用携带TraceID写入日志,记录耗时、参数、异常
二、今日学习重点
- 将文档入库、向量化、检索全部封装为可调用工具,实现纯工具驱动Agent
- Chroma持久向量库接入项目,上层工具无感知读写向量库
- 实现工具嵌套调用,上层检索/入库工具自动调用Embedding底层工具
- 安全、限流、熔断、日志中间件对四类工具统一生效
- 独立可运行完整项目,无Day1~Day8代码依赖
三、今日难点 & 解决方案
难点1:向量相关逻辑散落在代码各处,无法监控限流
解决方案:全部封装标准Function,统一网关调度,所有向量操作都经过中间件拦截
难点2:程序重启知识库丢失
解决方案:使用Chroma持久客户端,向量数据落地本地文件夹
难点3:Embedding高频调用频繁触发平台限流
解决方案:Embedding工具独立超时,令牌桶管控QPS,熔断拦截连续失败请求
难点4:模型不会自主执行文档入库/检索
解决方案:System提示词完整列出全部工具名称与用途,搭配少样本示例
四、完整可运行代码
依赖安装
pip install aiohttp pydantic fastapi uvicorn aioredis python-dotenv chromadb numpy
项目目录
day9_full_tool_agent/
├── .env # 环境配置
├── middleware.py # 限流/熔断/重试/日志
├── security.py # 注入防护、敏感脱敏、权限
├── llm_client.py # LLM+Embedding异步客户端
├── tool_gateway.py # 全量工具注册(Chroma向量工具+计算器)
├── memory_store.py # Redis分层持久记忆
├── agent_core.py # ReAct主智能体
└── main.py # FastAPI入口
1 .env 配置文件
# LLM & Embedding
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions
LLM_EMBED_URL=https://dashscope.aliyuncs.com/compatible-mode/v1/embeddings
LLM_API_KEY=你的API_KEY
# Redis
REDIS_URL=redis://127.0.0.1:6379
# 限流熔断
TOKEN_BUCKET_CAP=12
TOKEN_RATE=3
MAX_FAIL_TIMES=3
COOLDOWN_SECONDS=10
# 开关
ENABLE_SAFE_FILTER=true
ENABLE_LOG=true
# Chroma持久化路径
CHROMA_PERSIST_PATH=./chroma_kb
CHROMA_COLLECTION_NAME=agent_kb
2 middleware.py
import asyncio
import time
import json
import os
import uuid
from dotenv import load_dotenv
load_dotenv()
# 令牌桶限流
class TokenBucket:
def __init__(self, cap: int, rate: float):
self.capacity = cap
self.rate = rate
self.token_num = cap
self.last_refill_time = time.time()
def refill(self):
now = time.time()
delta = now - self.last_refill_time
add_tokens = delta * self.rate
self.token_num = min(self.capacity, self.token_num + add_tokens)
self.last_refill_time = now
async def get_token(self) -> bool:
self.refill()
if self.token_num >= 1:
self.token_num -= 1
return True
return False
global_bucket = TokenBucket(int(os.getenv("TOKEN_BUCKET_CAP")), float(os.getenv("TOKEN_RATE")))
# 熔断降级
class CircuitBreaker:
def __init__(self, max_fail: int, cool_sec: int):
self.max_fail = max_fail
self.cool = cool_sec
self.fail_count = 0
self.state = "closed"
self.open_start = 0
async def can_run(self) -> bool:
now = time.time()
if self.state == "open":
if now - self.open_start > self.cool:
self.state = "half_open"
return True
return False
return True
def success(self):
self.fail_count = 0
self.state = "closed"
def fail(self):
self.fail_count += 1
if self.fail_count >= self.max_fail and self.state != "open":
self.state = "open"
self.open_start = time.time
breaker_map = {}
def get_breaker(tool_name: str):
if tool_name not in breaker_map:
breaker_map[tool_name] = CircuitBreaker(int(os.getenv("MAX_FAIL_TIMES")), int(os.getenv("COOLDOWN_SECONDS")))
return breaker_map[tool_name]
# 指数退避重试
async def backoff_retry(call_func, max_retry=3):
delay = 1
for _ in range(max_retry):
try:
return await call_func()
except Exception as e:
err_str = str(e)
if "401" in err_str or "参数非法" in err_str or "权限不足" in err_str:
raise e
await asyncio.sleep(delay)
delay = min(delay * 2, 8)
return await call_func()
# 日志埋点
class AgentLog:
def __init__(self):
self.log_file = "./agent_trace.log"
self.switch = os.getenv("ENABLE_LOG") == "true"
def write(self, trace_id: str, level: str, content: dict):
if not self.switch:
return
log_data = {
"time": time.strftime("%Y-%m-%d %H:%M:%S"),
"trace_id": trace_id,
"level": level,
**content
}
with open(self.log_file, "a", encoding="utf-8") as f:
f.write(json.dumps(log_data, ensure_ascii=False) + "\n")
log_client = AgentLog()
def create_trace_id() -> str:
return str(uuid.uuid4())
3 security.py
import re
import os
SENSITIVE_RULES = [
re.compile(r"1[3-9]\d{9}"),
re.compile(r"\d{17}[\dXx]"),
re.compile(r"\d{16,19}")
]
INJECT_MARKS = ["```", '"""', "'''"]
# 工具权限分级
TOOL_AUTH = {
"calculator": "user",
"text_embedding": "user",
"rag_search": "user",
"vector_add": "admin"
}
def escape_inject(text: str) -> str:
for mark in INJECT_MARKS:
text = text.replace(mark, mark[:-1])
return text
def desensitize(text: str) -> str:
for pat in SENSITIVE_RULES:
text = pat.sub("******", text)
return text
def check_tool_auth(tool_name: str, user_role: str) -> bool:
need_auth = TOOL_AUTH.get(tool_name, "admin")
if need_auth == "user" and user_role in ["user", "admin"]:
return True
if need_auth == "admin" and user_role == "admin":
return True
return False
async def input_verify(raw_input: str, max_len=800) -> tuple[bool, str]:
if len(raw_input) > max_len:
return False, "输入过长,请精简提问"
safe_text = escape_inject(raw_input)
return True, safe_text
4 llm_client.py
import aiohttp
import asyncio
import json
import re
from typing import List, Dict, AsyncGenerator
from dotenv import load_dotenv
import os
load_dotenv()
class AsyncLLMClient:
def __init__(self):
self.base_url = os.getenv("LLM_BASE_URL")
self.embed_url = os.getenv("LLM_EMBED_URL")
self.api_key = os.getenv("LLM_API_KEY")
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
self.timeout_llm = aiohttp.ClientTimeout(total=60)
self.timeout_emb = aiohttp.ClientTimeout(total=15)
async def chat_sync(self, messages: List[Dict], temperature=0.1, tools=None):
payload = {
"model": "qwen-turbo",
"messages": messages,
"temperature": temperature,
"stream": False
}
if tools:
payload["tools"] = tools
async with aiohttp.ClientSession(timeout=self.timeout_llm) as session:
resp = await session.post(self.base_url, json=payload, headers=self.headers)
return await resp.json()
async def chat_stream(self, messages: List[Dict], temperature=0.1):
payload = {
"model": "qwen-turbo",
"messages": messages,
"temperature": temperature,
"stream": True
}
async with aiohttp.ClientSession(timeout=self.timeout_llm) as session:
async with session.post(self.base_url, json=payload, headers=self.headers) as resp:
buffer = ""
async for chunk in resp.content.iter_chunked(1024):
buffer += chunk.decode("utf-8")
while "data:" in buffer:
idx = buffer.find("data:")
end = buffer.find("\n\n", idx)
if end == -1:
break
block = buffer[idx+5:end].strip()
buffer = buffer[end+2:]
if block == "[DONE]":
return
try:
item = json.loads(block)
delta = item["choices"][0]["delta"].get("content", "")
if delta:
yield delta
except:
continue
async def batch_embedding(self, text_list: List[str]) -> List[List[float]]:
payload = {
"model": "text-embedding-v1",
"input": text_list
}
async with aiohttp.ClientSession(timeout=self.timeout_emb) as session:
resp = await session.post(self.embed_url, json=payload, headers=self.headers)
res_data = await resp.json()
vecs = [i["embedding"] for i in res_data["data"]]
return vecs
async def chat_json(self, messages: List[Dict], schema):
prompt_ext = f"仅输出标准JSON,无多余文字,JSON规范:{schema.model_json_schema()}"
new_msg = messages.copy()
new_msg[-1]["content"] += prompt_ext
raw = await self.chat_sync(new_msg, temperature=0.0)["choices"][0]["message"]["content"]
match = re.search(r"\{.*\}", raw, re.S)
if not match:
raw = await self.chat_sync(new_msg, 0.0)["choices"][0]["message"]["content"]
match = re.search(r"\{.*\}", raw, re.S)
return schema.model_validate_json(match.group())
llm_client = AsyncLLMClient()
5 tool_gateway.py(核心:全向量操作工具化+Chroma)
import asyncio
import chromadb
import os
from dotenv import load_dotenv
import numpy as np
from pydantic import BaseModel, Field
from typing import List, Dict
from llm_client import llm_client
from middleware import get_breaker, backoff_retry, log_client
from security import check_tool_auth
load_dotenv()
# Chroma初始化
CHROMA_PATH = os.getenv("CHROMA_PERSIST_PATH")
COLL_NAME = os.getenv("CHROMA_COLLECTION_NAME")
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
coll = chroma_client.get_or_create_collection(name=COLL_NAME, metadata={"hnsw:space": "cosine"})
# ========== 全部工具参数模型 ==========
class CalcArgs(BaseModel):
num1: float = Field(description="第一个数字")
num2: float = Field(description="第二个数字")
op: str = Field(description="运算符 +-*/")
class EmbeddingArgs(BaseModel):
text_list: List[str] = Field(description="待向量化文本数组")
class VectorAddArgs(BaseModel):
text: str = Field(description="入库知识库文本")
source: str = Field(default="default", description="文档来源标签")
class RagSearchArgs(BaseModel):
query: str = Field(description="检索问题")
top_k: int = Field(default=3)
# ========== 底层工具函数 ==========
async def calculator(num1, num2, op):
try:
match op:
case "+": res = num1 + num2
case "-": res = num1 - num2
case "*": res = num1 * num2
case "/":
if num2 == 0:
return "计算失败:除数不能为0"
res = num1 / num2
case _: return f"不支持运算符{op}"
return f"计算结果:{num1}{op}={res}"
except Exception as e:
return f"计算异常:{str(e)}"
# 底层向量工具:调用Embedding接口
async def text_embedding(text_list: List[str]):
return await llm_client.batch_embedding(text_list)
# 入库工具:嵌套embedding存入Chroma
async def vector_add(text: str, source: str):
vecs = await text_embedding([text])
import uuid
doc_id = str(uuid.uuid4())
coll.add(
ids=[doc_id],
embeddings=vecs,
documents=[text],
metadatas=[{"source": source}]
)
return f"文档入库成功,来源:{source}"
# 检索工具:嵌套embedding查询Chroma
async def rag_search(query: str, top_k: int):
q_vec = await text_embedding([query])
res = coll.query(
query_embeddings=q_vec,
n_results=top_k
)
docs = res["documents"][0]
if not docs:
return "知识库未匹配到相关内容"
return "\n".join([f"文档片段:{d}" for d in docs])
# ========== 统一工具网关 ==========
class ToolGateway:
def __init__(self):
self.tool_registry = {
"calculator": {"model": CalcArgs, "func": calculator},
"text_embedding": {"model": EmbeddingArgs, "func": text_embedding},
"vector_add": {"model": VectorAddArgs, "func": vector_add},
"rag_search": {"model": RagSearchArgs, "func": rag_search}
}
def get_tools_schema(self) -> List[Dict]:
tools = []
for name, info in self.tool_registry.items():
tools.append({
"type": "function",
"function": {
"name": name,
"description": f"{name}工具,完成对应能力",
"parameters": info["model"].model_json_schema()
}
})
return tools
async def run_tool(self, tool_name: str, raw_args: dict, trace_id: str, user_role: str):
log_client.write(trace_id, "INFO", {"tool": tool_name, "args": raw_args})
# 权限校验
if not check_tool_auth(tool_name, user_role):
log_client.write(trace_id, "WARN", {"msg": "无权限调用"})
return f"权限不足,无法使用{tool_name}"
# 熔断判断
breaker = get_breaker(tool_name)
if not await breaker.can_run():
log_client.write(trace_id, "WARN", {"msg": "工具熔断繁忙"})
return f"{tool_name}当前繁忙,请稍后重试"
# 参数校验
try:
param_model = self.tool_registry[tool_name]["model"]
params = param_model(**raw_args)
except Exception as e:
log_client.write(trace_id, "ERROR", {"err": f"参数校验失败:{str(e)}"})
return f"参数格式错误:{str(e)}"
# 带重试执行
async def task():
func = self.tool_registry[tool_name]["func"]
return await func(**params.model_dump())
try:
res = await backoff_retry(task)
breaker.success()
log_client.write(trace_id, "INFO", {"tool_result": str(res)[:300]})
return res
except Exception as e:
breaker.fail()
log_client.write(trace_id, "ERROR", {"err": str(e)})
return f"工具执行异常:{str(e)}"
gateway = ToolGateway()
6 memory_store.py
import aioredis
import json
from typing import List, Dict
from dotenv import load_dotenv
import os
from llm_client import llm_client
load_dotenv()
class RedisChatMemory:
def __init__(self):
self.redis_url = os.getenv("REDIS_URL")
self.redis = None
self.expire = 7 * 24 * 3600
self.token_safe = 1800
self.keep_round = 3
async def connect(self):
self.redis = aioredis.from_url(self.redis_url, decode_responses=True)
async def close(self):
if self.redis:
await self.redis.close()
def get_key(self, sid: str):
return f"chat:session:{sid}"
async def load_history(self, sid: str) -> List[Dict]:
data = await self.redis.lrange(self.get_key(sid), 0, -1)
return [json.loads(item) for item in data]
async def append_msg(self, sid: str, role: str, content: str):
key = self.get_key(sid)
msg = json.dumps({"role": role, "content": content})
await self.redis.rpush(key, msg)
await self.redis.expire(key, self.expire)
def calc_token(self, msg_list: List[Dict]) -> int:
total = 0
for m in msg_list:
total += len(m.get("content", "")) * 2
return total
def slide_trim(self, msg_list: List[Dict]) -> List[Dict]:
sys = None
other = []
for m in msg_list:
if m["role"] == "system":
sys = m
else:
other.append(m)
new_other = other[-self.keep_round:]
if sys:
return [sys] + new_other
return new_other
async def compress_summary(self, msg_list: List[Dict]) -> List[Dict]:
sys = None
history = []
for m in msg_list:
if m["role"] == "system":
sys = m
else:
history.append(m)
if len(history) <= self.keep_round:
return msg_list
old = history[:-self.keep_round]
recent = history[-self.keep_round:]
old_text = "\n".join([f"{m['role']}:{m['content']}" for m in old])
sum_msg = [{"role":"user", "content":f"精简对话摘要,保留数字、关键业务信息:{old_text}"}]
summary = await llm_client.chat_sync(sum_msg, temperature=0.0)["choices"][0]["message"]["content"]
new_msg = []
if sys:
new_msg.append(sys)
new_msg.append({"role":"system", "content":f"历史对话摘要:{summary}"})
new_msg.extend(recent)
return new_msg
async def auto_compress(self, msg_list: List[Dict]) -> List[Dict]:
if self.calc_token(msg_list) < self.token_safe:
return msg_list
trim_res = self.slide_trim(msg_list)
if self.calc_token(trim_res) < self.token_safe:
return trim_res
return await self.compress_summary(msg_list)
memory = RedisChatMemory()
7 agent_core.py
import json
from typing import List, Dict
from llm_client import llm_client
from tool_gateway import gateway
from memory_store import memory
from middleware import global_bucket, log_client
from security import desensitize
class ReActAgent:
def __init__(self):
self.max_loop = 4
self.system_prompt = """
你是ReAct智能体,严格使用提供工具完成任务,禁止编造信息。
可用工具清单:
1. calculator:数学加减乘除计算
2. text_embedding:批量文本转为语义向量(底层工具,检索/入库自动调用)
3. vector_add:将文档存入Chroma持久知识库(管理员权限)
4. rag_search:基于Chroma知识库检索资料,自动生成查询向量
规则:
- 数学运算必须调用calculator
- 查询相关知识调用rag_search
- 需要保存资料调用vector_add
- 禁止虚构无依据内容
"""
self.tools_schema = gateway.get_tools_schema()
async def reflect(self, query: str, info: Dict) -> bool:
info_text = "\n".join([f"{k}:{v}" for k, v in info.items()])
prompt = f"仅输出true/false,判断现有信息是否足够回答用户问题:问题:{query} 已有资料:{info_text}"
resp = await llm_client.chat_sync([{"role":"user","content":prompt}], temperature=0.0)
return "true" in resp["choices"][0]["message"]["content"].lower()
async def run_chat(self, session_id: str, user_role: str, user_input: str, trace_id: str):
history = await memory.load_history(session_id)
base_msg = [{"role":"system", "content": self.system_prompt}] + history
base_msg.append({"role":"user", "content": user_input})
msg_list = await memory.auto_compress(base_msg)
loop_cnt = 0
tool_record = {}
while loop_cnt < self.max_loop:
loop_cnt += 1
resp = await llm_client.chat_sync(msg_list, tools=self.tools_schema)
msg = resp["choices"][0]["message"]
if "tool_calls" not in msg or not msg["tool_calls"]:
break
tc = msg["tool_calls"][0]
cid = tc["id"]
t_name = tc["function"]["name"]
t_args = json.loads(tc["function"]["arguments"])
obs = await gateway.run_tool(t_name, t_args, trace_id, user_role)
tool_record[t_name] = obs
msg_list.append(msg)
msg_list.append({"role":"tool", "tool_call_id":cid, "content":obs})
enough = await self.reflect(user_input, tool_record)
if enough:
break
final_raw = await llm_client.chat_sync(msg_list, temperature=0.1)["choices"][0]["message"]["content"]
final_ans = desensitize(final_raw)
await memory.append(session_id, "user", user_input)
await memory.append(session_id, "assistant", final_ans)
log_client.write(trace_id, "INFO", {"final_answer": final_ans[:300]})
return {
"trace_id": trace_id,
"tool_record": tool_record,
"answer": final_ans
}
react_agent = ReActAgent()
8 main.py
from fastapi import FastAPI, Query
import asyncio
from agent_core import react_agent
from memory_store import memory
from middleware import global_bucket, create_trace_id, log_client
from security import input_verify
app = FastAPI(title="Day9 全工具化Agent|Chroma持久向量库")
@app.on_event("startup")
async def startup():
await memory.connect()
@app.on_event("shutdown")
async def shutdown():
await memory.close()
@app.get("/agent/chat")
async def chat_api(
session_id: str = Query(...),
user_role: str = Query(default="user", description="user/admin/guest"),
prompt: str = Query(...)
):
trace_id = create_trace_id()
log_client.write(trace_id, "INFO", {"session_id": session_id, "input": prompt})
# 安全校验
ok, safe_text = await input_verify(prompt)
if not ok:
return {"trace_id": trace_id, "answer": safe_text}
# 限流
if not await global_b.get_token():
log_client.write(trace_id, "WARN", {"msg": "触发限流"})
return {"trace_id": trace_id, "answer": "当前访问繁忙,请稍后重试"}
# 完整Agent流程
result = await react_agent.run_chat(session_id, user_role, safe_text, trace_id)
return result
if __name__ == "__main__":
import uvicorn
uvicorn.run("main.py", reload=True)
五、实操练习任务
- 配置.env密钥,启动Redis,运行项目,Chroma自动生成本地持久文件夹
- 管理员角色提问:
vector_add 文本=RAG依靠Embedding生成向量存入Chroma持久知识库 source=学习文档,验证文档入库 - 普通用户提问
什么是RAG,自动调用rag_search(底层调用text_embedding)检索Chroma - 提问
(256+44)*7,触发calculator工具 - 高频请求测试令牌桶限流;多次制造Embedding报错验证熔断
- guest角色尝试vector_add,验证管理员权限拦截
- 重启程序,再次检索,验证Chroma向量数据不丢失
六、配套面试题
基础问答
- 为什么把文档入库、向量化、检索全部封装为工具?
- Chroma对比FAISS/内存向量池优势?
- 工具嵌套调用执行逻辑是什么?
- 四类工具分别对应的权限等级?
- Embedding工具为什么也要做限流熔断?
工程实操题
- 如何保证所有向量操作都经过统一中间件监控?
- Chroma持久化后如何做文档更新/删除工具?
- ReAct如何区分普通查询和新增知识库操作?
- 向量相关工具出现连续429限流如何处理?
拓展思考题
- 如何给rag_search增加MMR去重参数,做成可配置工具参数?
- 分布式部署如何替换本地Chroma为远程Chroma服务?
- 如何增加批量文档入库工具,一次性存入多条文本?
学习总结
本版完全满足两点核心要求:
- 所有向量、计算、知识库操作全部标准化Function工具,无硬编码业务逻辑,模型自主调度;
- 向量存储使用Chroma本地持久向量库;
整套架构生产可用,限流、熔断、安全、日志对全部工具统一覆盖,工具嵌套执行贴合真实Agent业务场景。
学向勤中得,萤窗万卷书。

浙公网安备 33010602011771号