基础RAG实现,最佳入门选择(十)
RAG系统的上下文压缩
一种上下文压缩技术来提高我们RAG系统的效率。将过滤和压缩检索到的文本块,以仅保留最相关的部分,减少噪音并提高响应质量。
在为RAG检索文档时,经常会得到包含相关和不相关信息的块。上下文压缩帮助我们:
-删除不相关的句子和段落
-只关注与查询相关的信息
-最大化我们上下文窗口中的有用信号
具体代码实现
PDF文本提取
从PDF文件中提取全部文本
def extract_text_from_pdf(pdf_path):
"""
从PDF文件中提取全部文本
:param pdf_path: PDF文件路径
:return: 提取的文本内容(str)
"""
print(f"[步骤] 正在从PDF提取文本: {pdf_path}")
with open(pdf_path, 'rb') as f:
reader = PdfReader(f)
text = ""
for i, page in enumerate(reader.pages):
page_text = page.extract_text()
if page_text:
text += page_text
print(f" - 已提取第{i+1}页")
print(f"[完成] PDF文本提取完成,总长度: {len(text)} 字符\n")
return text
文本分块
文本分割为带重叠的块
def chunk_text(text, n=1000, overlap=200):
"""
将文本分割为带重叠的块
:param text: 原始文本
:param n: 每块字符数
:param overlap: 块间重叠字符数
:return: 文本块列表
"""
print(f"[分块] 每块{n}字符,重叠{overlap}字符")
chunks = []
for i in range(0, len(text), n - overlap):
chunks.append(text[i:i + n])
print(f"[分块] 完成,共{len(chunks)}块\n")
return chunks
向量生成
阿里embedding模型批量生成文本向量
def create_embeddings(texts, model=EMBEDDING_MODEL):
"""
用阿里embedding模型批量生成文本向量
:param texts: 文本列表
:param model: 嵌入模型名
:return: 向量列表
"""
if isinstance(texts, str):
texts = [texts]
print(f"[嵌入生成] 正在生成{len(texts)}条文本的向量...")
try:
response = TextEmbedding.call(
model=model,
input=texts,
api_key=ALI_API_KEY
)
if response.status_code == 200:
embeddings = [np.array(item['embedding']) for item in response.output['embeddings']]
print(f"[嵌入生成] 成功,返回{len(embeddings)}条向量\n")
return embeddings
else:
print(f"[嵌入生成] 失败: {response.message}")
return [np.zeros(1536)] * len(texts)
except Exception as e:
print(f"[嵌入生成] 异常: {e}")
return [np.zeros(1536)] * len(texts)
简单向量库
简单的向量存储与检索类
class SimpleVectorStore:
"""
简单的向量存储与检索类
"""
def __init__(self):
self.vectors = []
self.texts = []
self.metadata = []
def add_item(self, text, embedding, metadata=None):
self.vectors.append(np.array(embedding))
self.texts.append(text)
self.metadata.append(metadata or {})
def similarity_search(self, query_embedding, k=5):
if not self.vectors:
return []
query_vector = np.array(query_embedding)
similarities = []
for i, vector in enumerate(self.vectors):
sim = np.dot(query_vector, vector) / (np.linalg.norm(query_vector) * np.linalg.norm(vector))
similarities.append((i, sim))
similarities.sort(key=lambda x: x[1], reverse=True)
results = []
for i in range(min(k, len(similarities))):
idx, score = similarities[i]
results.append({
"text": self.texts[idx],
"metadata": self.metadata[idx],
"similarity": score
})
return results
文档处理主流程
处理PDF文档,提取文本、分块、生成向量并构建向量库
def process_document(pdf_path, chunk_size=1000, chunk_overlap=200):
"""
处理PDF文档,提取文本、分块、生成向量并构建向量库。
"""
print("[主流程] 开始处理文档...")
extracted_text = extract_text_from_pdf(pdf_path)
text_chunks = chunk_text(extracted_text, chunk_size, chunk_overlap)
print("[主流程] 初始化向量库...")
vector_store = SimpleVectorStore()
print("[主流程] 为每个块生成向量...")
chunk_embeddings = create_embeddings(text_chunks)
for i, (chunk, embedding) in enumerate(zip(text_chunks, chunk_embeddings)):
print(f"[块{i+1}/{len(text_chunks)}] 已生成向量,长度: {len(chunk)} 字符")
vector_store.add_item(chunk, embedding, {"type": "chunk", "index": i})
print("[主流程] 文档处理完毕,向量库构建完成\n")
return vector_store
压缩函数
用LLM压缩单个chunk,只保留与query相关内容
def compress_chunk(chunk, query, compression_type="summary", model=LLM_MODEL):
"""
用LLM压缩单个chunk,只保留与query相关内容
:param chunk: 文本块
:param query: 用户问题
:param compression_type: 压缩类型(selective/summary/extraction)
:param model: LLM模型名
:return: (压缩后文本, 压缩比)
"""
if compression_type == "selective":
system_prompt = """你是信息过滤专家,只保留与用户问题直接相关的句子或段落,删除无关内容。输出仅包含有助于回答问题的原文内容,顺序不变,无需评论。"""
elif compression_type == "summary":
system_prompt = """你是摘要专家,请针对用户问题对文本块进行简明摘要,只保留相关信息,省略无关细节。"""
else:
system_prompt = """你是信息抽取专家,只提取与用户问题直接相关的原文句子,逐句输出,无需评论。"""
user_prompt = f"""
问题: {query}
文档内容:
{chunk}
请只输出与问题相关的内容。"""
try:
response = Generation.call(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
api_key=ALI_API_KEY,
result_format='message'
)
if response.status_code == 200:
compressed_chunk = response.output.choices[0].message.content.strip()
else:
print(f"[压缩] LLM调用失败: {response.message}")
compressed_chunk = ""
except Exception as e:
print(f"[压缩] LLM调用异常: {e}")
compressed_chunk = ""
original_length = len(chunk)
compressed_length = len(compressed_chunk)
compression_ratio = (original_length - compressed_length) / original_length * 100 if original_length > 0 else 0
print(f"[压缩] 原长: {original_length},压缩后: {compressed_length},压缩比: {compression_ratio:.2f}%")
return compressed_chunk, compression_ratio
批量压缩
批量压缩文本块
def batch_compress_chunks(chunks, query, compression_type="selective", model=LLM_MODEL):
"""
批量压缩文本块
:param chunks: 文本块列表
:param query: 用户问题
:param compression_type: 压缩类型
:param model: LLM模型名
:return: [(压缩后文本, 压缩比)]
"""
print(f"[批量压缩] 共{len(chunks)}块,类型: {compression_type}")
results = []
total_original = 0
total_compressed = 0
for i, chunk in enumerate(chunks):
print(f"[批量压缩] 正在压缩第{i+1}/{len(chunks)}块...")
compressed, ratio = compress_chunk(chunk, query, compression_type, model)
results.append((compressed, ratio))
total_original += len(chunk)
total_compressed += len(compressed)
overall_ratio = (total_original - total_compressed) / total_original * 100 if total_original > 0 else 0
print(f"[批量压缩] 总体压缩比: {overall_ratio:.2f}%\n")
return results
LLM生成回答
def generate_response(query, context, model=LLM_MODEL):
"""
用大模型基于上下文生成回答
:param query: 用户问题
:param context: 上下文
:param model: 生成模型名
:return: 回答内容
"""
print("[流程] 正在调用大模型生成最终回答...")
system_prompt = "你是一个AI助手,只能基于给定上下文回答问题。如果上下文无法直接回答,请回复:'信息不足,无法回答。'"
user_prompt = f"""
上下文:\n{context}\n\n问题:{query}\n\n请只基于上述上下文简明准确作答。"""
try:
response = Generation.call(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
api_key=ALI_API_KEY,
result_format='message'
)
if response.status_code == 200:
print("[流程] 回答生成成功\n")
return response.output.choices[0].message.content.strip()
else:
print(f"[流程] 回答生成失败: {response.message}")
return ""
except Exception as e:
print(f"[流程] 回答生成异常: {e}")
return ""
上下文压缩增强RAG主流程
def rag_with_compression(pdf_path, query, k=10, compression_type="selective", model=LLM_MODEL):
"""
上下文压缩增强RAG主流程
:param pdf_path: PDF路径
:param query: 用户问题
:param k: 检索top-k块
:param compression_type: 压缩类型
:param model: LLM模型名
:return: 结果字典
"""
print("\n=== 上下文压缩增强RAG流程开始 ===")
print(f"[输入] 问题: {query}")
print(f"[输入] 压缩类型: {compression_type}")
vector_store = process_document(pdf_path)
query_embedding = create_embeddings([query])[0]
print(f"[检索] 正在检索top-{k}相关块...")
results = vector_store.similarity_search(query_embedding, k=k)
retrieved_chunks = [r["text"] for r in results]
print(f"[检索] 已获取{len(retrieved_chunks)}个相关块\n")
compressed_results = batch_compress_chunks(retrieved_chunks, query, compression_type, model)
compressed_chunks = [r[0] for r in compressed_results]
compression_ratios = [r[1] for r in compressed_results]
filtered_chunks = [(c, r) for c, r in zip(compressed_chunks, compression_ratios) if c.strip()]
if not filtered_chunks:
print("[警告] 所有块均被压缩为空,回退使用原始块。")
filtered_chunks = [(c, 0.0) for c in retrieved_chunks]
compressed_chunks, compression_ratios = zip(*filtered_chunks)
context = "\n\n---\n\n".join(compressed_chunks)
print("[流程] 正在生成最终回答...")
response = generate_response(query, context, model)
result = {
"query": query,
"original_chunks": retrieved_chunks,
"compressed_chunks": compressed_chunks,
"compression_ratios": compression_ratios,
"context_length_reduction": f"{sum(compression_ratios)/len(compression_ratios):.2f}%",
"response": response
}
print("\n=== 最终AI回答 ===")
print(response)
print("=== 上下文压缩增强RAG流程结束 ===\n")
return result
标准RAG主流程
def standard_rag(pdf_path, query, k=10, model=LLM_MODEL):
"""
标准RAG主流程(无压缩)
:param pdf_path: PDF路径
:param query: 用户问题
:param k: 检索top-k块
:param model: LLM模型名
:return: 结果字典
"""
print("\n=== 标准RAG流程开始 ===")
print(f"[输入] 问题: {query}")
vector_store = process_document(pdf_path)
query_embedding = create_embeddings([query])[0]
print(f"[检索] 正在检索top-{k}相关块...")
results = vector_store.similarity_search(query_embedding, k=k)
retrieved_chunks = [r["text"] for r in results]
print(f"[检索] 已获取{len(retrieved_chunks)}个相关块\n")
context = "\n\n---\n\n".join(retrieved_chunks)
print("[流程] 正在生成最终回答...")
response = generate_response(query, context, model)
result = {
"query": query,
"chunks": retrieved_chunks,
"response": response
}
print("\n=== 最终AI回答 ===")
print(response)
print("=== 标准RAG流程结束 ===\n")
return result
执行结果
========== 上下文压缩增强RAG主流程演示 ==========
[配置] 使用API密钥: sk-fc6ad...2f23
[配置] PDF路径: data/2888年Java程序员找工作最新场景题.pdf
[配置] 问题: Java程序员面试中常见的技术问题有哪些?
[配置] 压缩类型: selective
[配置] top-k: 8
=== 上下文压缩增强RAG流程开始 ===
[输入] 问题: Java程序员面试中常见的技术问题有哪些?
[输入] 压缩类型: selective
[主流程] 开始处理文档...
[步骤] 正在从PDF提取文本: data/2888年Java程序员找工作最新场景题.pdf
- 已提取第1页
- 已提取第2页
- 已提取第3页
- 已提取第4页
- 已提取第5页
- 已提取第6页
- 已提取第7页
- 已提取第8页
- 已提取第9页
- 已提取第10页
[完成] PDF文本提取完成,总长度: 6984 字符
[分块] 每块1000字符,重叠200字符
[分块] 完成,共9块
[主流程] 初始化向量库...
[主流程] 为每个块生成向量...
[嵌入生成] 正在生成9条文本的向量...
[嵌入生成] 成功,返回9条向量
[块1/9] 已生成向量,长度: 1000 字符
[块2/9] 已生成向量,长度: 1000 字符
[块3/9] 已生成向量,长度: 1000 字符
[块4/9] 已生成向量,长度: 1000 字符
[块5/9] 已生成向量,长度: 1000 字符
[块6/9] 已生成向量,长度: 1000 字符
[块7/9] 已生成向量,长度: 1000 字符
[块8/9] 已生成向量,长度: 1000 字符
[块9/9] 已生成向量,长度: 584 字符
[主流程] 文档处理完毕,向量库构建完成
[嵌入生成] 正在生成1条文本的向量...
[嵌入生成] 成功,返回1条向量
[检索] 正在检索top-8相关块...
[检索] 已获取8个相关块
[批量压缩] 共8块,类型: selective
[批量压缩] 正在压缩第1/8块...
[压缩] 原长: 1000,压缩后: 146,压缩比: 85.40%
[批量压缩] 正在压缩第2/8块...
[压缩] 原长: 1000,压缩后: 155,压缩比: 84.50%
[批量压缩] 正在压缩第3/8块...
[压缩] 原长: 1000,压缩后: 74,压缩比: 92.60%
[批量压缩] 正在压缩第4/8块...
[压缩] 原长: 1000,压缩后: 82,压缩比: 91.80%
[批量压缩] 正在压缩第5/8块...
[压缩] 原长: 1000,压缩后: 55,压缩比: 94.50%
[批量压缩] 正在压缩第6/8块...
[压缩] 原长: 1000,压缩后: 36,压缩比: 96.40%
[批量压缩] 正在压缩第7/8块...
[压缩] 原长: 1000,压缩后: 53,压缩比: 94.70%
[批量压缩] 正在压缩第8/8块...
[压缩] 原长: 1000,压缩后: 30,压缩比: 97.00%
[批量压缩] 总体压缩比: 92.11%
[流程] 正在生成最终回答...
[流程] 正在调用大模型生成最终回答...
[流程] 回答生成成功
=== 最终AI回答 ===
信息不足,无法回答。
=== 上下文压缩增强RAG流程结束 ===
========== 演示结束 ==========
进程已结束,退出代码为 0
压缩完之后由于提示词原因,信息不足已经无法回答了。。。

完整代码
# -*- coding: utf-8 -*-
"""
上下文压缩增强RAG主流程(阿里大模型版,详细中文注释+详细控制台输出)
"""
import os
import numpy as np
from PyPDF2 import PdfReader
from dashscope import Generation, TextEmbedding
import json
import sys
# ========== 密钥配置:优先从api_keys.py读取,否则用环境变量 ==========
# try:
# from test.api_keys import ALI_API_KEY
# except Exception:
# ALI_API_KEY = os.getenv("ALI_API_KEY", "")
# if not ALI_API_KEY:
# print("[错误] 未找到API密钥,请在test/api_keys.py或环境变量中配置ALI_API_KEY!")
# sys.exit(1)
ALI_API_KEY="sk-fc6ad8ecef4b446eb4cccc372f23"
# ==============================================
LLM_MODEL = "qwen-max" # 通义千问主力模型
EMBEDDING_MODEL = "text-embedding-v2" # 阿里云嵌入模型
# ========== PDF文本提取 ==========
def extract_text_from_pdf(pdf_path):
"""
从PDF文件中提取全部文本
:param pdf_path: PDF文件路径
:return: 提取的文本内容(str)
"""
print(f"[步骤] 正在从PDF提取文本: {pdf_path}")
with open(pdf_path, 'rb') as f:
reader = PdfReader(f)
text = ""
for i, page in enumerate(reader.pages):
page_text = page.extract_text()
if page_text:
text += page_text
print(f" - 已提取第{i+1}页")
print(f"[完成] PDF文本提取完成,总长度: {len(text)} 字符\n")
return text
# ========== 文本分块 ==========
def chunk_text(text, n=1000, overlap=200):
"""
将文本分割为带重叠的块
:param text: 原始文本
:param n: 每块字符数
:param overlap: 块间重叠字符数
:return: 文本块列表
"""
print(f"[分块] 每块{n}字符,重叠{overlap}字符")
chunks = []
for i in range(0, len(text), n - overlap):
chunks.append(text[i:i + n])
print(f"[分块] 完成,共{len(chunks)}块\n")
return chunks
# ========== 向量生成 ==========
def create_embeddings(texts, model=EMBEDDING_MODEL):
"""
用阿里embedding模型批量生成文本向量
:param texts: 文本列表
:param model: 嵌入模型名
:return: 向量列表
"""
if isinstance(texts, str):
texts = [texts]
print(f"[嵌入生成] 正在生成{len(texts)}条文本的向量...")
try:
response = TextEmbedding.call(
model=model,
input=texts,
api_key=ALI_API_KEY
)
if response.status_code == 200:
embeddings = [np.array(item['embedding']) for item in response.output['embeddings']]
print(f"[嵌入生成] 成功,返回{len(embeddings)}条向量\n")
return embeddings
else:
print(f"[嵌入生成] 失败: {response.message}")
return [np.zeros(1536)] * len(texts)
except Exception as e:
print(f"[嵌入生成] 异常: {e}")
return [np.zeros(1536)] * len(texts)
# ========== 简单向量库 ==========
class SimpleVectorStore:
"""
简单的向量存储与检索类
"""
def __init__(self):
self.vectors = []
self.texts = []
self.metadata = []
def add_item(self, text, embedding, metadata=None):
self.vectors.append(np.array(embedding))
self.texts.append(text)
self.metadata.append(metadata or {})
def similarity_search(self, query_embedding, k=5):
if not self.vectors:
return []
query_vector = np.array(query_embedding)
similarities = []
for i, vector in enumerate(self.vectors):
sim = np.dot(query_vector, vector) / (np.linalg.norm(query_vector) * np.linalg.norm(vector))
similarities.append((i, sim))
similarities.sort(key=lambda x: x[1], reverse=True)
results = []
for i in range(min(k, len(similarities))):
idx, score = similarities[i]
results.append({
"text": self.texts[idx],
"metadata": self.metadata[idx],
"similarity": score
})
return results
# ========== 文档处理主流程 ==========
def process_document(pdf_path, chunk_size=1000, chunk_overlap=200):
"""
处理PDF文档,提取文本、分块、生成向量并构建向量库。
"""
print("[主流程] 开始处理文档...")
extracted_text = extract_text_from_pdf(pdf_path)
text_chunks = chunk_text(extracted_text, chunk_size, chunk_overlap)
print("[主流程] 初始化向量库...")
vector_store = SimpleVectorStore()
print("[主流程] 为每个块生成向量...")
chunk_embeddings = create_embeddings(text_chunks)
for i, (chunk, embedding) in enumerate(zip(text_chunks, chunk_embeddings)):
print(f"[块{i+1}/{len(text_chunks)}] 已生成向量,长度: {len(chunk)} 字符")
vector_store.add_item(chunk, embedding, {"type": "chunk", "index": i})
print("[主流程] 文档处理完毕,向量库构建完成\n")
return vector_store
# ========== 压缩函数 ==========
def compress_chunk(chunk, query, compression_type="summary", model=LLM_MODEL):
"""
用LLM压缩单个chunk,只保留与query相关内容
:param chunk: 文本块
:param query: 用户问题
:param compression_type: 压缩类型(selective/summary/extraction)
:param model: LLM模型名
:return: (压缩后文本, 压缩比)
"""
if compression_type == "selective":
system_prompt = """你是信息过滤专家,只保留与用户问题直接相关的句子或段落,删除无关内容。输出仅包含有助于回答问题的原文内容,顺序不变,无需评论。"""
elif compression_type == "summary":
system_prompt = """你是摘要专家,请针对用户问题对文本块进行简明摘要,只保留相关信息,省略无关细节。"""
else:
system_prompt = """你是信息抽取专家,只提取与用户问题直接相关的原文句子,逐句输出,无需评论。"""
user_prompt = f"""
问题: {query}
文档内容:
{chunk}
请只输出与问题相关的内容。"""
try:
response = Generation.call(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
api_key=ALI_API_KEY,
result_format='message'
)
if response.status_code == 200:
compressed_chunk = response.output.choices[0].message.content.strip()
else:
print(f"[压缩] LLM调用失败: {response.message}")
compressed_chunk = ""
except Exception as e:
print(f"[压缩] LLM调用异常: {e}")
compressed_chunk = ""
original_length = len(chunk)
compressed_length = len(compressed_chunk)
compression_ratio = (original_length - compressed_length) / original_length * 100 if original_length > 0 else 0
print(f"[压缩] 原长: {original_length},压缩后: {compressed_length},压缩比: {compression_ratio:.2f}%")
return compressed_chunk, compression_ratio
# ========== 批量压缩 ==========
def batch_compress_chunks(chunks, query, compression_type="selective", model=LLM_MODEL):
"""
批量压缩文本块
:param chunks: 文本块列表
:param query: 用户问题
:param compression_type: 压缩类型
:param model: LLM模型名
:return: [(压缩后文本, 压缩比)]
"""
print(f"[批量压缩] 共{len(chunks)}块,类型: {compression_type}")
results = []
total_original = 0
total_compressed = 0
for i, chunk in enumerate(chunks):
print(f"[批量压缩] 正在压缩第{i+1}/{len(chunks)}块...")
compressed, ratio = compress_chunk(chunk, query, compression_type, model)
results.append((compressed, ratio))
total_original += len(chunk)
total_compressed += len(compressed)
overall_ratio = (total_original - total_compressed) / total_original * 100 if total_original > 0 else 0
print(f"[批量压缩] 总体压缩比: {overall_ratio:.2f}%\n")
return results
# ========== LLM生成回答 ==========
def generate_response(query, context, model=LLM_MODEL):
"""
用大模型基于上下文生成回答
:param query: 用户问题
:param context: 上下文
:param model: 生成模型名
:return: 回答内容
"""
print("[流程] 正在调用大模型生成最终回答...")
system_prompt = "你是一个AI助手,只能基于给定上下文回答问题。如果上下文无法直接回答,请回复:'信息不足,无法回答。'"
user_prompt = f"""
上下文:\n{context}\n\n问题:{query}\n\n请只基于上述上下文简明准确作答。"""
try:
response = Generation.call(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
api_key=ALI_API_KEY,
result_format='message'
)
if response.status_code == 200:
print("[流程] 回答生成成功\n")
return response.output.choices[0].message.content.strip()
else:
print(f"[流程] 回答生成失败: {response.message}")
return ""
except Exception as e:
print(f"[流程] 回答生成异常: {e}")
return ""
# ========== 上下文压缩增强RAG主流程 ==========
def rag_with_compression(pdf_path, query, k=10, compression_type="selective", model=LLM_MODEL):
"""
上下文压缩增强RAG主流程
:param pdf_path: PDF路径
:param query: 用户问题
:param k: 检索top-k块
:param compression_type: 压缩类型
:param model: LLM模型名
:return: 结果字典
"""
print("\n=== 上下文压缩增强RAG流程开始 ===")
print(f"[输入] 问题: {query}")
print(f"[输入] 压缩类型: {compression_type}")
vector_store = process_document(pdf_path)
query_embedding = create_embeddings([query])[0]
print(f"[检索] 正在检索top-{k}相关块...")
results = vector_store.similarity_search(query_embedding, k=k)
retrieved_chunks = [r["text"] for r in results]
print(f"[检索] 已获取{len(retrieved_chunks)}个相关块\n")
compressed_results = batch_compress_chunks(retrieved_chunks, query, compression_type, model)
compressed_chunks = [r[0] for r in compressed_results]
compression_ratios = [r[1] for r in compressed_results]
filtered_chunks = [(c, r) for c, r in zip(compressed_chunks, compression_ratios) if c.strip()]
if not filtered_chunks:
print("[警告] 所有块均被压缩为空,回退使用原始块。")
filtered_chunks = [(c, 0.0) for c in retrieved_chunks]
compressed_chunks, compression_ratios = zip(*filtered_chunks)
context = "\n\n---\n\n".join(compressed_chunks)
print("[流程] 正在生成最终回答...")
response = generate_response(query, context, model)
result = {
"query": query,
"original_chunks": retrieved_chunks,
"compressed_chunks": compressed_chunks,
"compression_ratios": compression_ratios,
"context_length_reduction": f"{sum(compression_ratios)/len(compression_ratios):.2f}%",
"response": response
}
print("\n=== 最终AI回答 ===")
print(response)
print("=== 上下文压缩增强RAG流程结束 ===\n")
return result
# ========== 标准RAG主流程 ==========
def standard_rag(pdf_path, query, k=10, model=LLM_MODEL):
"""
标准RAG主流程(无压缩)
:param pdf_path: PDF路径
:param query: 用户问题
:param k: 检索top-k块
:param model: LLM模型名
:return: 结果字典
"""
print("\n=== 标准RAG流程开始 ===")
print(f"[输入] 问题: {query}")
vector_store = process_document(pdf_path)
query_embedding = create_embeddings([query])[0]
print(f"[检索] 正在检索top-{k}相关块...")
results = vector_store.similarity_search(query_embedding, k=k)
retrieved_chunks = [r["text"] for r in results]
print(f"[检索] 已获取{len(retrieved_chunks)}个相关块\n")
context = "\n\n---\n\n".join(retrieved_chunks)
print("[流程] 正在生成最终回答...")
response = generate_response(query, context, model)
result = {
"query": query,
"chunks": retrieved_chunks,
"response": response
}
print("\n=== 最终AI回答 ===")
print(response)
print("=== 标准RAG流程结束 ===\n")
return result
# ========== main方法示例 ==========
def main():
"""
主方法示例:体验上下文压缩增强RAG
"""
# 路径配置(可根据实际情况修改)
pdf_path = "data/2888年Java程序员找工作最新场景题.pdf" # 示例PDF路径
query = "Java程序员面试中常见的技术问题有哪些?" # 示例问题
compression_type = "selective" # 可选: selective/summary/extraction
k = 8 # top-k检索块数
print("\n========== 上下文压缩增强RAG主流程演示 ==========")
print(f"[配置] 使用API密钥: {ALI_API_KEY[:8]}...{ALI_API_KEY[-4:]}")
print(f"[配置] PDF路径: {pdf_path}")
print(f"[配置] 问题: {query}")
print(f"[配置] 压缩类型: {compression_type}")
print(f"[配置] top-k: {k}\n")
rag_with_compression(pdf_path, query, k=k, compression_type=compression_type)
print("========== 演示结束 ==========")
if __name__ == "__main__":
main()

浙公网安备 33010602011771号