软件研发 --- Dify 生成 PPT 方案分析
# 纯离线 AI LLM + 知识库生成高质量 PPT 的最佳方案
## 整体架构
```
┌─────────────────────────────────────────────────────┐
│ 用户交互层 │
│ (Web UI / CLI / Desktop) │
├─────────────────────────────────────────────────────┤
│ 编排调度层 │
│ (LangChain / LlamaIndex) │
├──────────┬──────────────┬───────────────────────────┤
│ 本地LLM │ RAG知识库 │ PPT生成引擎 │
│ (Ollama) │ (向量数据库) │ (python-pptx/模板引擎) │
├──────────┴──────────────┴───────────────────────────┤
│ 本地存储层 │
│ (文档/向量库/模板库/图片素材库) │
└─────────────────────────────────────────────────────┘
```
---
## 一、核心组件选型
### 1. 本地 LLM 推理引擎
| 方案 | 推荐模型 | 显存需求 | 特点 |
|------|---------|---------|------|
| **Ollama** (首推) | Qwen2.5-72B-Q4, DeepSeek-V2.5, Llama3.1-70B | 48GB+ | 部署最简单 |
| llama.cpp | 同上的GGUF格式 | 可CPU运行 | 灵活,支持CPU+GPU混合 |
| vLLM | 原始精度模型 | 80GB+ | 吞吐量最高 |
| LocalAI | 多种格式 | 灵活 | OpenAI API兼容 |
**推荐配置:**
```bash
# 安装 Ollama
curl -fsSL https://ollama.com/install.sh | sh
# 拉取推荐模型(中文能力强)
ollama pull qwen2.5:72b-instruct-q4_K_M # 高质量首选
ollama pull qwen2.5:32b-instruct-q4_K_M # 平衡之选
ollama pull deepseek-v2.5:236b-q4_K_M # 旗舰级(需多卡)
# 嵌入模型(用于知识库)
ollama pull bge-m3 # 中英文嵌入最佳
ollama pull nomic-embed-text # 轻量替代
```
### 2. 知识库 (RAG) 系统
```
文档 → 解析 → 分块 → 向量化 → 向量数据库 → 检索 → LLM生成
```
| 组件 | 推荐方案 | 说明 |
|------|---------|------|
| 文档解析 | **Unstructured** / MinerU / Docling | 支持PDF/Word/网页等 |
| 文本分块 | LangChain RecursiveCharacterTextSplitter | 语义分块 |
| 嵌入模型 | **BGE-M3** (本地) | 中英文双语最强 |
| 向量数据库 | **ChromaDB** / Milvus Lite / FAISS | 纯本地,无需服务 |
| RAG框架 | **LlamaIndex** / LangChain | 编排检索+生成 |
---
## 二、完整实现方案
### 项目结构
```
ppt-generator/
├── config/
│ └── settings.yaml # 配置文件
├── knowledge_base/
│ ├── documents/ # 原始文档
│ ├── vector_store/ # 向量数据库存储
│ └── kb_manager.py # 知识库管理
├── llm/
│ └── local_llm.py # LLM调用封装
├── ppt_engine/
│ ├── templates/ # PPT模板库
│ │ ├── business_blue.pptx
│ │ ├── tech_dark.pptx
│ │ └── minimal_white.pptx
│ ├── assets/ # 图标/图片素材
│ ├── content_generator.py # 内容生成
│ ├── layout_engine.py # 布局引擎
│ ├── chart_generator.py # 图表生成
│ └── ppt_builder.py # PPT组装
├── app.py # 主程序入口
├── web_ui.py # Gradio Web界面
└── requirements.txt
```
### 核心代码实现
#### `requirements.txt`
```txt
ollama
langchain>=0.2.0
langchain-community
llama-index>=0.10.0
chromadb>=0.4.0
python-pptx>=0.6.23
sentence-transformers
unstructured[all-docs]
pydantic>=2.0
gradio>=4.0
Pillow
matplotlib
plotly
kaleido
pyyaml
```
#### `config/settings.yaml`
```yaml
llm:
provider: "ollama"
model: "qwen2.5:32b-instruct-q4_K_M"
base_url: "http://localhost:11434"
temperature: 0.7
num_ctx: 8192
embedding:
provider: "ollama"
model: "bge-m3"
base_url: "http://localhost:11434"
knowledge_base:
chunk_size: 1000
chunk_overlap: 200
top_k: 8
vector_store_path: "./knowledge_base/vector_store"
documents_path: "./knowledge_base/documents"
ppt:
default_template: "business_blue"
templates_path: "./ppt_engine/templates"
output_path: "./output"
max_slides: 30
```
#### `llm/local_llm.py` — LLM 封装
```python
import json
import yaml
from ollama import Client
from typing import Optional
class LocalLLM:
def __init__(self, config_path: str = "config/settings.yaml"):
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
self.config = config['llm']
self.client = Client(host=self.config['base_url'])
self.model = self.config['model']
def generate(self, prompt: str, system_prompt: str = "",
json_mode: bool = False, temperature: float = None) -> str:
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
options = {
"temperature": temperature or self.config['temperature'],
"num_ctx": self.config['num_ctx'],
}
response = self.client.chat(
model=self.model,
messages=messages,
format="json" if json_mode else "",
options=options
)
return response['message']['content']
def generate_structured(self, prompt: str, system_prompt: str = "") -> dict:
"""生成结构化JSON输出"""
result = self.generate(prompt, system_prompt, json_mode=True)
try:
return json.loads(result)
except json.JSONDecodeError:
# 尝试提取JSON部分
import re
json_match = re.search(r'\{[\s\S]*\}', result)
if json_match:
return json.loads(json_match.group())
raise ValueError(f"无法解析LLM输出为JSON: {result[:200]}")
```
#### `knowledge_base/kb_manager.py` — 知识库管理
```python
import os
import yaml
import chromadb
from chromadb.config import Settings
from langchain_community.document_loaders import (
DirectoryLoader, PyPDFLoader, Docx2txtLoader,
TextLoader, UnstructuredMarkdownLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from typing import List, Dict
import hashlib
class KnowledgeBaseManager:
def __init__(self, config_path: str = "config/settings.yaml"):
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
self.kb_config = config['knowledge_base']
self.embed_config = config['embedding']
# 初始化嵌入模型(本地)
self.embed_model = SentenceTransformer(
'BAAI/bge-m3', # 首次会下载,之后离线可用
cache_folder="./models/embeddings"
)
# 初始化ChromaDB(纯本地)
self.chroma_client = chromadb.PersistentClient(
path=self.kb_config['vector_store_path'],
settings=Settings(anonymized_telemetry=False)
)
self.collection = self.chroma_client.get_or_create_collection(
name="ppt_knowledge_base",
metadata={"hnsw:space": "cosine"}
)
# 文本分割器
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=self.kb_config['chunk_size'],
chunk_overlap=self.kb_config['chunk_overlap'],
separators=["\n\n", "\n", "。", "!", "?", ".", "!", "?", " "]
)
def ingest_documents(self, doc_path: str = None):
"""导入文档到知识库"""
doc_path = doc_path or self.kb_config['documents_path']
# 支持多种文档格式
loaders = {
"*.pdf": PyPDFLoader,
"*.docx": Docx2txtLoader,
"*.txt": TextLoader,
"*.md": UnstructuredMarkdownLoader,
}
all_docs = []
for glob_pattern, loader_cls in loaders.items():
try:
loader = DirectoryLoader(
doc_path, glob=glob_pattern,
loader_cls=loader_cls,
show_progress=True
)
docs = loader.load()
all_docs.extend(docs)
print(f" 加载 {glob_pattern}: {len(docs)} 个文件")
except Exception as e:
print(f" 加载 {glob_pattern} 出错: {e}")
# 分块
chunks = self.text_splitter.split_documents(all_docs)
print(f"总共 {len(chunks)} 个文本块")
# 批量嵌入并存储
batch_size = 64
for i in range(0, len(chunks), batch_size):
batch = chunks[i:i+batch_size]
texts = [chunk.page_content for chunk in batch]
metadatas = [chunk.metadata for chunk in batch]
ids = [hashlib.md5(t.encode()).hexdigest() for t in texts]
embeddings = self.embed_model.encode(
texts, normalize_embeddings=True
).tolist()
self.collection.upsert(
ids=ids,
embeddings=embeddings,
documents=texts,
metadatas=metadatas
)
print(f" 已索引 {min(i+batch_size, len(chunks))}/{len(chunks)}")
print(f"知识库构建完成,共 {self.collection.count()} 条记录")
def search(self, query: str, top_k: int = None) -> List[Dict]:
"""检索相关文档"""
top_k = top_k or self.kb_config['top_k']
query_embedding = self.embed_model.encode(
[query], normalize_embeddings=True
).tolist()
results = self.collection.query(
query_embeddings=query_embedding,
n_results=top_k,
include=["documents", "metadatas", "distances"]
)
retrieved = []
for doc, meta, dist in zip(
results['documents'][0],
results['metadatas'][0],
results['distances'][0]
):
retrieved.append({
"content": doc,
"metadata": meta,
"relevance_score": 1 - dist # cosine距离转相似度
})
return retrieved
def get_context(self, query: str, top_k: int = None) -> str:
"""获取格式化的上下文文本"""
results = self.search(query, top_k)
context_parts = []
for i, r in enumerate(results, 1):
source = r['metadata'].get('source', '未知来源')
context_parts.append(
f"[参考资料 {i}] (来源: {source}, 相关度: {r['relevance_score']:.2f})\n"
f"{r['content']}"
)
return "\n\n---\n\n".join(context_parts)
```
#### `ppt_engine/content_generator.py` — 核心内容生成(多步骤 Pipeline)
```python
import json
from typing import Dict, List, Optional
from llm.local_llm import LocalLLM
from knowledge_base.kb_manager import KnowledgeBaseManager
class PPTContentGenerator:
def __init__(self):
self.llm = LocalLLM()
self.kb = KnowledgeBaseManager()
def generate_outline(self, topic: str, requirements: str = "",
num_slides: int = 15) -> Dict:
"""第一步:生成PPT大纲"""
# RAG检索相关知识
context = self.kb.get_context(topic, top_k=10)
system_prompt = """你是一位专业的PPT策划专家,擅长制作高质量的商业演示文稿。
你需要根据用户的主题和参考资料,生成结构清晰、逻辑严密的PPT大纲。
请以JSON格式输出。"""
prompt = f"""请为以下主题生成一份PPT大纲:
## 主题
{topic}
## 额外要求
{requirements if requirements else "无特殊要求"}
## 参考资料
{context}
## 输出要求
生成约{num_slides}页幻灯片的大纲,JSON格式如下:
{{
"title": "PPT总标题",
"subtitle": "副标题",
"target_audience": "目标受众",
"sections": [
{{
"section_title": "章节标题",
"slides": [
{{
"slide_number": 1,
"title": "幻灯片标题",
"layout_type": "title_slide|content|two_column|chart|image_text|bullet_points|quote|comparison|timeline|summary",
"key_points": ["要点1", "要点2"],
"notes": "演讲者备注/说明"
}}
]
}}
]
}}"""
outline = self.llm.generate_structured(prompt, system_prompt)
return outline
def generate_slide_content(self, outline: Dict, slide_info: Dict,
section_context: str) -> Dict:
"""第二步:为每一页生成详细内容"""
# 针对当前slide的主题做RAG检索
slide_query = f"{slide_info['title']} {' '.join(slide_info.get('key_points', []))}"
context = self.kb.get_context(slide_query, top_k=5)
system_prompt = """你是PPT内容撰写专家。请根据大纲和参考资料,为指定的幻灯片生成详细内容。
内容要求:
1. 文字精炼,每个要点不超过2行
2. 使用具体数据和案例支撑观点
3. 逻辑清晰,层次分明
请以JSON格式输出。"""
layout_type = slide_info.get('layout_type', 'bullet_points')
# 根据不同布局类型定义输出格式
layout_formats = {
"title_slide": '''{{
"title": "主标题",
"subtitle": "副标题",
"author": "作者/团队",
"date": "日期"
}}''',
"bullet_points": '''{{
"title": "标题",
"bullets": [
{{"main": "主要点", "sub": "补充说明(可选)"}},
...
],
"footer_note": "底部备注(可选)"
}}''',
"two_column": '''{{
"title": "标题",
"left_column": {{
"heading": "左列标题",
"content": ["要点1", "要点2", ...]
}},
"right_column": {{
"heading": "右列标题",
"content": ["要点1", "要点2", ...]
}}
}}''',
"chart": '''{{
"title": "标题",
"chart_type": "bar|line|pie|radar",
"chart_data": {{
"categories": ["类别1", "类别2", ...],
"series": [
{{"name": "系列名", "values": [数值1, 数值2, ...]}}
]
}},
"insight": "图表核心洞察",
"source": "数据来源"
}}''',
"comparison": '''{{
"title": "标题",
"items": [
{{"name": "项目A", "features": ["特点1", "特点2", ...]}},
{{"name": "项目B", "features": ["特点1", "特点2", ...]}}
],
"conclusion": "对比结论"
}}''',
"timeline": '''{{
"title": "标题",
"events": [
{{"time": "时间点", "event": "事件描述", "detail": "详细说明"}},
...
]
}}''',
"quote": '''{{
"title": "标题",
"quote": "引用内容",
"author": "引用来源",
"commentary": "评论说明"
}}''',
"summary": '''{{
"title": "总结标题",
"key_takeaways": ["核心要点1", "核心要点2", ...],
"call_to_action": "行动号召",
"contact_info": "联系方式(可选)"
}}'''
}
format_template = layout_formats.get(layout_type, layout_formats["bullet_points"])
prompt = f"""## PPT总主题
{outline.get('title', '')}
## 当前章节
{section_context}
## 当前幻灯片信息
- 页码: {slide_info['slide_number']}
- 标题: {slide_info['title']}
- 布局类型: {layout_type}
- 关键要点: {json.dumps(slide_info.get('key_points', []), ensure_ascii=False)}
## 参考资料
{context}
## 输出格式
请按以下JSON格式输出该页的详细内容:
{format_template}
同时在JSON中添加 "speaker_notes": "演讲者备注" 字段。"""
content = self.llm.generate_structured(prompt, system_prompt)
content['layout_type'] = layout_type
return content
def generate_full_ppt_content(self, topic: str, requirements: str = "",
num_slides: int = 15) -> Dict:
"""完整的PPT内容生成Pipeline"""
print("📋 第1步:生成PPT大纲...")
outline = self.generate_outline(topic, requirements, num_slides)
print(f" ✅ 大纲完成:{outline.get('title', topic)}")
print(f" 📊 共 {sum(len(s['slides']) for s in outline['sections'])} 页幻灯片")
print("\n📝 第2步:生成每页详细内容...")
all_slides = []
for section in outline['sections']:
section_context = f"章节:{section['section_title']}"
for slide_info in section['slides']:
print(f" 🔄 生成第 {slide_info['slide_number']} 页: {slide_info['title']}")
slide_content = self.generate_slide_content(
outline, slide_info, section_context
)
slide_content['slide_number'] = slide_info['slide_number']
slide_content['section'] = section['section_title']
all_slides.append(slide_content)
print("\n✨ 第3步:内容优化和一致性检查...")
# 可选:让LLM审查整体一致性
result = {
"metadata": {
"title": outline.get('title', topic),
"subtitle": outline.get('subtitle', ''),
"target_audience": outline.get('target_audience', ''),
"total_slides": len(all_slides)
},
"outline": outline,
"slides": all_slides
}
return result
```
#### `ppt_engine/ppt_builder.py` — PPT 文件构建
```python
import os
import json
from pptx import Presentation
from pptx.util import Inches, Pt, Emu
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_ANCHOR
from pptx.enum.chart import XL_CHART_TYPE
from pptx.chart.data import CategoryChartData
from typing import Dict, List, Optional
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from io import BytesIO
class PPTBuilder:
"""高质量PPT构建引擎"""
# 配色方案
COLOR_SCHEMES = {
"business_blue": {
"primary": RGBColor(0x00, 0x52, 0x9B),
"secondary": RGBColor(0x00, 0x96, 0xD6),
"accent": RGBColor(0xFF, 0x8C, 0x00),
"text_dark": RGBColor(0x33, 0x33, 0x33),
"text_light": RGBColor(0xFF, 0xFF, 0xFF),
"bg_light": RGBColor(0xF5, 0xF7, 0xFA),
"bg_dark": RGBColor(0x00, 0x3D, 0x73),
},
"tech_dark": {
"primary": RGBColor(0x1A, 0x1A, 0x2E),
"secondary": RGBColor(0x16, 0x21, 0x3E),
"accent": RGBColor(0x0F, 0xCE, 0xDD),
"text_dark": RGBColor(0xE0, 0xE0, 0xE0),
"text_light": RGBColor(0xFF, 0xFF, 0xFF),
"bg_light": RGBColor(0x1A, 0x1A, 0x2E),
"bg_dark": RGBColor(0x0F, 0x0F, 0x1A),
},
"minimal_white": {
"primary": RGBColor(0x2D, 0x2D, 0x2D),
"secondary": RGBColor(0x75, 0x75, 0x75),
"accent": RGBColor(0xE8, 0x4D, 0x39),
"text_dark": RGBColor(0x2D, 0x2D, 0x2D),
"text_light": RGBColor(0xFF, 0xFF, 0xFF),
"bg_light": RGBColor(0xFF, 0xFF, 0xFF),
"bg_dark": RGBColor(0x2D, 0x2D, 0x2D),
}
}
def __init__(self, template_path: str = None, color_scheme: str = "business_blue"):
if template_path and os.path.exists(template_path):
self.prs = Presentation(template_path)
else:
self.prs = Presentation()
# 设置16:9比例
self.prs.slide_width = Inches(13.333)
self.prs.slide_height = Inches(7.5)
self.colors = self.COLOR_SCHEMES.get(color_scheme, self.COLOR_SCHEMES["business_blue"])
self.slide_width = self.prs.slide_width
self.slide_height = self.prs.slide_height
def _add_background(self, slide, color: RGBColor = None):
"""设置幻灯片背景"""
background = slide.background
fill = background.fill
fill.solid()
fill.fore_color.rgb = color or self.colors['bg_light']
def _add_text_box(self, slide, left, top, width, height,
text: str, font_size: int = 18,
color: RGBColor = None, bold: bool = False,
alignment: PP_ALIGN = PP_ALIGN.LEFT,
font_name: str = "微软雅黑"):
"""添加文本框"""
txBox = slide.shapes.add_textbox(left, top, width, height)
tf = txBox.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
p.text = text
p.font.size = Pt(font_size)
p.font.color.rgb = color or self.colors['text_dark']
p.font.bold = bold
p.font.name = font_name
p.alignment = alignment
return txBox
def _add_shape_with_text(self, slide, shape_type, left, top, width, height,
text: str, fill_color: RGBColor = None,
font_size: int = 14, font_color: RGBColor = None):
"""添加带文字的形状"""
from pptx.enum.shapes import MSO_SHAPE
shape = slide.shapes.add_shape(shape_type, left, top, width, height)
shape.fill.solid()
shape.fill.fore_color.rgb = fill_color or self.colors['primary']
shape.line.fill.background()
tf = shape.text_frame
tf.word_wrap = True
tf.paragraphs[0].text = text
tf.paragraphs[0].font.size = Pt(font_size)
tf.paragraphs[0].font.color.rgb = font_color or self.colors['text_light']
tf.paragraphs[0].font.name = "微软雅黑"
tf.paragraphs[0].alignment = PP_ALIGN.CENTER
return shape
def build_title_slide(self, slide_data: Dict):
"""构建标题页"""
slide = self.prs.slides.add_slide(self.prs.slide_layouts[6]) # 空白布局
self._add_background(slide, self.colors['bg_dark'])
# 装饰线条
from pptx.enum.shapes import MSO_SHAPE
line = slide.shapes.add_shape(
MSO_SHAPE.RECTANGLE,
Inches(1), Inches(3.2), Inches(2), Inches(0.05)
)
line.fill.solid()
line.fill.fore_color.rgb = self.colors['accent']
line.line.fill.background()
# 主标题
self._add_text_box(
slide, Inches(1), Inches(1.5), Inches(11), Inches(1.5),
slide_data.get('title', ''), font_size=44,
color=self.colors['text_light'], bold=True,
alignment=PP_ALIGN.LEFT
)
# 副标题
self._add_text_box(
slide, Inches(1), Inches(3.5), Inches(8), Inches(1),
slide_data.get('subtitle', ''), font_size=22,
color=self.colors['secondary'],
alignment=PP_ALIGN.LEFT
)
# 作者/日期
author = slide_data.get('author', '')
date = slide_data.get('date', '')
if author or date:
self._add_text_box(
slide, Inches(1), Inches(5.5), Inches(6), Inches(0.5),
f"{author} | {date}", font_size=14,
color=self.colors['text_light'],
alignment=PP_ALIGN.LEFT
)
def build_bullet_slide(self, slide_data: Dict):
"""构建要点列表页"""
slide = self.prs.slides.add_slide(self.prs.slide_layouts[6])
self._add_background(slide)
# 标题区域背景条
from pptx.enum.shapes import MSO_SHAPE
header_bg = slide.shapes.add_shape(
MSO_SHAPE.RECTANGLE,
Inches(0), Inches(0), self.slide_width, Inches(1.2)
)
header_bg.fill.solid()
header_bg.fill.fore_color.rgb = self.colors['primary']
header_bg.line.fill.background()
# 标题
self._add_text_box(
slide, Inches(0.8), Inches(0.2), Inches(11), Inches(0.8),
slide_data.get('title', ''), font_size=32,
color=self.colors['text_light'], bold=True
)
# 要点内容
bullets = slide_data.get('bullets', [])
y_start = Inches(1.8)
for i, bullet in enumerate(bullets):
if isinstance(bullet, dict):
main_text = bullet.get('main', '')
sub_text = bullet.get('sub', '')
else:
main_text = str(bullet)
sub_text = ''
# 序号圆圈
circle = slide.shapes.add_shape(
MSO_SHAPE.OVAL,
Inches(0.8), y_start + Inches(i * 1.0) + Inches(0.05),
Inches(0.4), Inches(0.4)
)
circle.fill.solid()
circle.fill.fore_color.rgb = self.colors['accent']
circle.line.fill.background()
tf = circle.text_frame
tf.paragraphs[0].text = str(i + 1)
tf.paragraphs[0].font.size = Pt(14)
tf.paragraphs[0].font.color.rgb = self.colors['text_light']
tf.paragraphs[0].font.bold = True
tf.paragraphs[0].alignment = PP_ALIGN.CENTER
tf.paragraphs[0].font.name = "微软雅黑"
# 主要点文本
self._add_text_box(
slide, Inches(1.5), y_start + Inches(i * 1.0),
Inches(10), Inches(0.5),
main_text, font_size=20, bold=True,
color=self.colors['text_dark']
)
# 补充说明
if sub_text:
self._add_text_box(
slide, Inches(1.5), y_start + Inches(i * 1.0) + Inches(0.45),
Inches(10), Inches(0.4),
sub_text, font_size=14,
color=self.colors['secondary']
)
def build_two_column_slide(self, slide_data: Dict):
"""构建双栏页"""
slide = self.prs.slides.add_slide(self.prs.slide_layouts[6])
self._add_background(slide)
from pptx.enum.shapes import MSO_SHAPE
# 标题
header_bg = slide.shapes.add_shape(
MSO_SHAPE.RECTANGLE,
Inches(0), Inches(0), self.slide_width, Inches(1.2)
)
header_bg.fill.solid()
header_bg.fill.fore_color.rgb = self.colors['primary']
header_bg.line.fill.background()
self._add_text_box(
slide, Inches(0.8), Inches(0.2), Inches(11), Inches(0.8),
slide_data.get('title', ''), font_size=32,
color=self.colors['text_light'], bold=True
)
# 左栏
left_col = slide_data.get('left_column', {})
left_box = slide.shapes.add_shape(
MSO_SHAPE.ROUNDED_RECTANGLE,
Inches(0.5), Inches(1.6), Inches(5.8), Inches(5.2)
)
left_box.fill.solid()
left_box.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
left_box.line.color.rgb = self.colors['primary']
left_box.line.width = Pt(2)
self._add_text_box(
slide, Inches(0.8), Inches(1.8), Inches(5.2), Inches(0.6),
left_col.get('heading', ''), font_size=22, bold=True,
color=self.colors['primary']
)
left_content = left_col.get('content', [])
for j, item in enumerate(left_content):
self._add_text_box(
slide, Inches(1.0), Inches(2.6 + j * 0.6), Inches(5), Inches(0.5),
f"• {item}", font_size=16, color=self.colors['text_dark']
)
# 右栏
right_col = slide_data.get('right_column', {})
right_box = slide.shapes.add_shape(
MSO_SHAPE.ROUNDED_RECTANGLE,
Inches(6.8), Inches(1.6), Inches(5.8), Inches(5.2)
)
right_box.fill.solid()
right_box.fill.fore_color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
right_box.line.color.rgb = self.colors['secondary']
right_box.line.width = Pt(2)
self._add_text_box(
slide, Inches(7.1), Inches(1.8), Inches(5.2), Inches(0.6),
right_col.get('heading', ''), font_size=22, bold=True,
color=self.colors['secondary']
)
right_content = right_col.get('content', [])
for j, item in enumerate(right_content):
self._add_text_box(
slide, Inches(7.3), Inches(2.6 + j * 0.6), Inches(5), Inches(0.5),
f"• {item}", font_size=16, color=self.colors['text_dark']
)
def build_chart_slide(self, slide_data: Dict):
"""构建图表页"""
slide = self.prs.slides.add_slide(self.prs.slide_layouts[6])
self._add_background(slide)
from pptx.enum.shapes import MSO_SHAPE
# 标题
header_bg = slide.shapes.add_shape(
MSO_SHAPE.RECTANGLE,
Inches(0), Inches(0), self.slide_width, Inches(1.2)
)
header_bg.fill.solid()
header_bg.fill.fore_color.rgb = self.colors['primary']
header_bg.line.fill.background()
self._add_text_box(
slide, Inches(0.8), Inches(0.2), Inches(11), Inches(0.8),
slide_data.get('title', ''), font_size=32,
color=self.colors['text_light'], bold=True
)
# 使用python-pptx原生图表
chart_data_info = slide_data.get('chart_data', {})
chart_type_str = slide_data.get('chart_type', 'bar')
chart_type_map = {
'bar': XL_CHART_TYPE.COLUMN_CLUSTERED,
'line': XL_CHART_TYPE.LINE_MARKERS,
'pie': XL_CHART_TYPE.PIE,
}
chart_type = chart_type_map.get(chart_type_str, XL_CHART_TYPE.COLUMN_CLUSTERED)
chart_data = CategoryChartData()
categories = chart_data_info.get('categories', ['A', 'B', 'C'])
chart_data.categories = categories
for series in chart_data_info.get('series', [{'name': '数据', 'values': [1,2,3]}]):
values = series.get('values', [0] * len(categories))
# 确保values长度与categories匹配
while len(values) < len(categories):
values.append(0)
chart_data.add_series(series.get('name', '系列'), values[:len(categories)])
chart = slide.shapes.add_chart(
chart_type, Inches(0.8), Inches(1.5), Inches(8), Inches(5),
chart_data
).chart
chart.has_legend = True
chart.legend.include_in_layout = False
# 洞察文本
insight = slide_data.get('insight', '')
if insight:
insight_box = slide.shapes.add_shape(
MSO_SHAPE.ROUNDED_RECTANGLE,
Inches(9.2), Inches(1.5), Inches(3.8), Inches(3)
)
insight_box.fill.solid()
insight_box.fill.fore_color.rgb = self.colors['accent']
insight_box.line.fill.background()
tf = insight_box.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
p.text = "💡 核心洞察"
p.font.size = Pt(16)
p.font.bold = True
p.font.color.rgb = self.colors['text_light']
p.font.name = "微软雅黑"
p2 = tf.add_paragraph()
p2.text = insight
p2.font.size = Pt(13)
p2.font.color.rgb = self.colors['text_light']
p2.font.name = "微软雅黑"
# 数据来源
source = slide_data.get('source', '')
if source:
self._add_text_box(
slide, Inches(0.8), Inches(6.8), Inches(8), Inches(0.4),
f"数据来源: {source}", font_size=10,
color=self.colors['secondary']
)
def build_summary_slide(self, slide_data: Dict):
"""构建总结页"""
slide = self.prs.slides.add_slide(self.prs.slide_layouts[6])
self._add_background(slide, self.colors['bg_dark'])
from pptx.enum.shapes import MSO_SHAPE
# 标题
self._add_text_box(
slide, Inches(1), Inches(0.5), Inches(11), Inches(1),
slide_data.get('title', '总结'), font_size=40,
color=self.colors['text_light'], bold=True,
alignment=PP_ALIGN.CENTER
)
# 分隔线
line = slide.shapes.add_shape(
MSO_SHAPE.RECTANGLE,
Inches(5.5), Inches(1.5), Inches(2.3), Inches(0.05)
)
line.fill.solid()
line.fill.fore_color.rgb = self.colors['accent']
line.line.fill.background()
# 核心要点
takeaways = slide_data.get('key_takeaways', [])
for i, point in enumerate(takeaways):
# 卡片式布局
card_width = Inches(3.5)
cards_per_row = 3
row = i // cards_per_row
col = i % cards_per_row
x = Inches(0.8) + col * (card_width + Inches(0.5))
y = Inches(2.2) + row * Inches(2.0)
card = slide.shapes.add_shape(
MSO_SHAPE.ROUNDED_RECTANGLE,
x, y, card_width, Inches(1.5)
)
card.fill.solid()
card.fill.fore_color.rgb = self.colors['primary']
card.line.fill.background()
tf = card.text_frame
tf.word_wrap = True
tf.paragraphs[0].alignment = PP_ALIGN.CENTER
# 序号
p = tf.paragraphs[0]
p.text = f"0{i+1}"
p.font.size = Pt(24)
p.font.bold = True
p.font.color.rgb = self.colors['accent']
p.font.name = "微软雅黑"
# 内容
p2 = tf.add_paragraph()
p2.text = point
p2.font.size = Pt(14)
p2.font.color.rgb = self.colors['text_light']
p2.font.name = "微软雅黑"
p2.alignment = PP_ALIGN.CENTER
# 行动号召
cta = slide_data.get('call_to_action', '')
if cta:
self._add_text_box(
slide, Inches(1), Inches(6.2), Inches(11), Inches(0.8),
cta, font_size=20,
color=self.colors['accent'], bold=True,
alignment=PP_ALIGN.CENTER
)
def build_section_divider(self, section_title: str, section_number: int):
"""构建章节分隔页"""
slide = self.prs.slides.add_slide(self.prs.slide_layouts[6])
self._add_background(slide, self.colors['primary'])
from pptx.enum.shapes import MSO_SHAPE
# 大数字
self._add_text_box(
slide, Inches(1), Inches(1), Inches(3), Inches(2.5),
f"{section_number:02d}", font_size=96,
color=self.colors['accent'], bold=True
)
# 分隔线
line = slide.shapes.add_shape(
MSO_SHAPE.RECTANGLE,
Inches(1), Inches(3.8), Inches(3), Inches(0.05)
)
line.fill.solid()
line.fill.fore_color.rgb = self.colors['text_light']
line.line.fill.background()
# 章节标题
self._add_text_box(
slide, Inches(1), Inches(4.2), Inches(11), Inches(1.5),
section_title, font_size=40,
color=self.colors['text_light'], bold=True
)
def build_ppt(self, content: Dict, output_path: str):
"""根据生成的内容构建完整PPT"""
metadata = content.get('metadata', {})
outline = content.get('outline', {})
slides = content.get('slides', [])
# 1. 标题页
title_data = {
'title': metadata.get('title', ''),
'subtitle': metadata.get('subtitle', ''),
'author': '',
'date': ''
}
# 从slides中找title_slide数据
if slides and slides[0].get('layout_type') == 'title_slide':
title_data.update(slides[0])
self.build_title_slide(title_data)
# 2. 内容页
current_section = None
section_number = 0
for slide_data in slides:
layout_type = slide_data.get('layout_type', 'bullet_points')
# 跳过已处理的标题页
if layout_type == 'title_slide' and slide_data == slides[0]:
continue
# 章节分隔页
section = slide_data.get('section', '')
if section and section != current_section:
current_section = section
section_number += 1
self.build_section_divider(section, section_number)
# 根据布局类型选择构建方法
builder_map = {
'bullet_points': self.build_bullet_slide,
'content': self.build_bullet_slide,
'two_column': self.build_two_column_slide,
'chart': self.build_chart_slide,
'comparison': self.build_two_column_slide,
'summary': self.build_summary_slide,
'quote': self.build_bullet_slide,
'timeline': self.build_bullet_slide,
}
builder = builder_map.get(layout_type, self.build_bullet_slide)
try:
builder(slide_data)
except Exception as e:
print(f" ⚠️ 构建第 {slide_data.get('slide_number', '?')} 页时出错: {e}")
# 降级为基础bullet布局
self.build_bullet_slide(slide_data)
# 保存
os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
self.prs.save(output_path)
print(f"\n✅ PPT已保存到: {output_path}")
```
#### `app.py` — 主程序入口
```python
import os
import json
import argparse
from datetime import datetime
from ppt_engine.content_generator import PPTContentGenerator
from ppt_engine.ppt_builder import PPTBuilder
from knowledge_base.kb_manager import KnowledgeBaseManager
def main():
parser = argparse.ArgumentParser(description='离线AI PPT生成器')
subparsers = parser.add_subparsers(dest='command')
# 知识库管理
kb_parser = subparsers.add_parser('kb', help='知识库管理')
kb_parser.add_argument('action', choices=['ingest', 'search', 'info'])
kb_parser.add_argument('--path', type=str, help='文档路径')
kb_parser.add_argument('--query', type=str, help='搜索查询')
# PPT生成
gen_parser = subparsers.add_parser('generate', help='生成PPT')
gen_parser.add_argument('--topic', type=str, required=True, help='PPT主题')
gen_parser.add_argument('--requirements', type=str, default='', help='额外要求')
gen_parser.add_argument('--slides', type=int, default=15, help='幻灯片数量')
gen_parser.add_argument('--template', type=str, default='business_blue',
choices=['business_blue', 'tech_dark', 'minimal_white'])
gen_parser.add_argument('--output', type=str, help='输出文件路径')
args = parser.parse_args()
if args.command == 'kb':
kb = KnowledgeBaseManager()
if args.action == 'ingest':
path = args.path or None
print("📚 开始导入文档到知识库...")
kb.ingest_documents(path)
elif args.action == 'search':
if not args.query:
print("请提供搜索查询 --query")
return
results = kb.search(args.query)
for r in results:
print(f"\n[相关度: {r['relevance_score']:.2f}]")
print(f"来源: {r['metadata'].get('source', '未知')}")
print(f"内容: {r['content'][:200]}...")
elif args.action == 'info':
print(f"知识库文档数: {kb.collection.count()}")
elif args.command == 'generate':
print("🚀 开始生成PPT...")
print(f" 主题: {args.topic}")
print(f" 页数: ~{args.slides}")
print(f" 模板: {args.template}")
print()
# 1. 生成内容
generator = PPTContentGenerator()
content = generator.generate_full_ppt_content(
topic=args.topic,
requirements=args.requirements,
num_slides=args.slides
)
# 保存中间JSON(方便调试和二次编辑)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
json_path = f"output/content_{timestamp}.json"
os.makedirs("output", exist_ok=True)
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(content, f, ensure_ascii=False, indent=2)
print(f"\n📄 内容JSON已保存: {json_path}")
# 2. 构建PPT
output_path = args.output or f"output/{args.topic}_{timestamp}.pptx"
builder = PPTBuilder(color_scheme=args.template)
builder.build_ppt(content, output_path)
print(f"\n🎉 PPT生成完成!")
print(f" 文件: {output_path}")
print(f" 页数: {content['metadata']['total_slides']}")
else:
parser.print_help()
if __name__ == '__main__':
main()
```
#### `web_ui.py` — Gradio Web 界面
```python
import gradio as gr
import json
import os
from datetime import datetime
from ppt_engine.content_generator import PPTContentGenerator
from ppt_engine.ppt_builder import PPTBuilder
from knowledge_base.kb_manager import KnowledgeBaseManager
generator = PPTContentGenerator()
kb = KnowledgeBaseManager()
def upload_documents(files):
"""上传文档到知识库"""
if not files:
return "请选择文件"
upload_dir = "knowledge_base/documents"
os.makedirs(upload_dir, exist_ok=True)
for file in files:
dest = os.path.join(upload_dir, os.path.basename(file.name))
os.rename(file.name, dest)
kb.ingest_documents(upload_dir)
return f"✅ 已导入 {len(files)} 个文档到知识库,当前共 {kb.collection.count()} 条记录"
def generate_ppt(topic, requirements, num_slides, template, progress=gr.Progress()):
"""生成PPT"""
if not topic:
return None, "请输入PPT主题"
progress(0.1, desc="生成大纲中...")
content = generator.generate_full_ppt_content(
topic=topic,
requirements=requirements,
num_slides=int(num_slides)
)
progress(0.7, desc="构建PPT文件...")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = f"output/{topic}_{timestamp}.pptx"
os.makedirs("output", exist_ok=True)
builder = PPTBuilder(color_scheme=template)
builder.build_ppt(content, output_path)
progress(1.0, desc="完成!")
outline_text = json.dumps(content['outline'], ensure_ascii=False, indent=2)
return output_path, f"✅ 生成完成!共 {content['metadata']['total_slides']} 页\n\n大纲预览:\n{outline_text[:2000]}"
# 构建界面
with gr.Blocks(title="离线AI PPT生成器", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎯 离线AI PPT生成器")
gr.Markdown("基于本地LLM + RAG知识库,完全离线生成高质量PPT")
with gr.Tab("📊 生成PPT"):
with gr.Row():
with gr.Column(scale=1):
topic_input = gr.Textbox(
label="PPT主题",
placeholder="例如:2024年度AI技术发展趋势分析报告",
lines=2
)
requirements_input = gr.Textbox(
label="额外要求(可选)",
placeholder="例如:面向技术管理层,重点关注大模型和Agent方向",
lines=3
)
with gr.Row():
slides_input = gr.Slider(
minimum=5, maximum=30, value=15, step=1,
label="幻灯片页数"
)
template_input = gr.Dropdown(
choices=["business_blue", "tech_dark", "minimal_white"],
value="business_blue",
label="配色方案"
)
generate_btn = gr.Button("🚀 开始生成", variant="primary", size="lg")
with gr.Column(scale=1):
output_file = gr.File(label="下载PPT")
output_info = gr.Textbox(label="生成信息", lines=15)
generate_btn.click(
fn=generate_ppt,
inputs=[topic_input, requirements_input, slides_input, template_input],
outputs=[output_file, output_info]
)
with gr.Tab("📚 知识库管理"):
gr.Markdown("上传文档到知识库,支持 PDF、Word、TXT、Markdown 格式")
file_upload = gr.File(
label="选择文档",
file_count="multiple",
file_types=[".pdf", ".docx", ".txt", ".md"]
)
upload_btn = gr.Button("📥 导入知识库")
upload_result = gr.Textbox(label="导入结果")
upload_btn.click(
fn=upload_documents,
inputs=[file_upload],
outputs=[upload_result]
)
gr.Markdown("### 🔍 知识库检索测试")
search_input = gr.Textbox(label="搜索查询")
search_btn = gr.Button("搜索")
search_result = gr.Textbox(label="搜索结果", lines=10)
def search_kb(query):
if not query:
return "请输入查询"
results = kb.search(query, top_k=5)
output = []
for i, r in enumerate(results, 1):
output.append(f"[{i}] 相关度: {r['relevance_score']:.2f}")
output.append(f" 来源: {r['metadata'].get('source', '未知')}")
output.append(f" 内容: {r['content'][:150]}...")
output.append("")
return "\n".join(output)
search_btn.click(fn=search_kb, inputs=[search_input], outputs=[search_result])
if __name__ == '__main__':
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
```
---
## 三、使用流程
### 1. 环境搭建
```bash
# 1. 安装Ollama并下载模型
curl -fsSL https://ollama.com/install.sh | sh
ollama pull qwen2.5:32b-instruct-q4_K_M
ollama pull bge-m3
# 2. 创建项目环境
python -m venv venv
source venv/bin/activate # Linux/Mac
pip install -r requirements.txt
# 3. 下载嵌入模型(首次运行会自动下载,之后离线可用)
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-m3', cache_folder='./models/embeddings')"
```
### 2. 构建知识库
```bash
# 将相关文档放入 knowledge_base/documents/ 目录
cp ~/my_reports/*.pdf knowledge_base/documents/
cp ~/my_docs/*.docx knowledge_base/documents/
# 导入知识库
python app.py kb ingest
# 测试检索
python app.py kb search --query "人工智能发展趋势"
```
### 3. 生成PPT
```bash
# 命令行方式
python app.py generate \
--topic "2024年人工智能技术发展趋势与企业应用策略" \
--requirements "面向CTO级别决策者,重点分析大模型、Agent和多模态方向,包含市场数据" \
--slides 20 \
--template business_blue
# Web界面方式
python web_ui.py
# 打开浏览器访问 http://localhost:7860
```
---
## 四、提升PPT质量的关键技巧
### 1. Prompt Engineering 优化
```python
# 在content_generator.py中,使用更精细的prompt
QUALITY_SYSTEM_PROMPT = """你是世界顶级的PPT设计咨询师,曾服务于麦肯锡、BCG等顶级咨询公司。
你的PPT内容遵循以下原则:
1. **金字塔原理**: 先结论后论据,每页一个核心观点
2. **MECE原则**: 分类完全穷尽、相互独立
3. **数据驱动**: 每个观点都有数据或案例支撑
4. **视觉思维**: 用对比、流程、矩阵等结构化方式呈现
5. **精炼表达**: 标题不超过10个字,要点不超过2行
你生成的每一页都应该能独立传达一个完整的信息。"""
```
### 2. 模板库扩展
```python
# 创建专业级PPT模板的关键要素
"""
templates/
├── business_blue.pptx # 商务蓝 - 适合正式汇报
├── tech_dark.pptx # 科技暗色 - 适合技术分享
├── minimal_white.pptx # 极简白 - 适合学术/创意
├── consulting_red.pptx # 咨询红 - 适合战略分析
└── data_green.pptx # 数据绿 - 适合数据报告
每个模板应包含:
- 预设的母版和布局
- 统一的字体方案
- 配色方案
- 占位符位置
- 页脚/页码样式
"""
```
### 3. 多轮优化 Pipeline
```python
class QualityOptimizer:
"""PPT内容质量优化器"""
def optimize_content(self, content: Dict) -> Dict:
"""多轮优化"""
# 第1轮:逻辑一致性检查
content = self._check_logic_consistency(content)
# 第2轮:数据准确性验证(基于知识库)
content = self._verify_data_accuracy(content)
# 第3轮:文字精炼
content = self._refine_text(content)
# 第4轮:视觉布局优化建议
content = self._optimize_layout(content)
return content
def _refine_text(self, content: Dict) -> Dict:
"""让LLM精炼每页文字"""
prompt = """请优化以下PPT页面的文字:
规则:
- 标题控制在8-12个字
- 每个要点不超过20个字
- 删除冗余修饰词
- 使用动词开头的短句
- 数字用阿拉伯数字
当前内容:
{content}
请返回优化后的JSON。"""
# ... 实现
```
---
## 五、硬件推荐配置
| 配置等级 | GPU | 内存 | 推荐模型 | 生成质量 |
|---------|-----|------|---------|---------|
| **入门** | 无GPU | 32GB RAM | Qwen2.5-7B (CPU) | ⭐⭐⭐ |
| **推荐** | RTX 4090 24GB | 64GB RAM | Qwen2.5-32B-Q4 | ⭐⭐⭐⭐ |
| **高端** | 2×RTX 4090 | 128GB RAM | Qwen2.5-72B-Q4 | ⭐⭐⭐⭐⭐ |
| **旗舰** | A100 80GB / H100 | 256GB RAM | DeepSeek-V2.5-236B | ⭐⭐⭐⭐⭐+ |
---
## 六、方案总结
```
┌─────────────────────────────────────────┐
│ 完整工作流程 │
│ │
│ 📚 文档 → 知识库(ChromaDB+BGE-M3) │
│ ↓ │
│ 🎯 用户输入主题和要求 │
│ ↓ │
│ 🔍 RAG检索相关知识 │
│ ↓ │
│ 📋 LLM生成大纲 (结构化JSON) │
│ ↓ │
│ 📝 LLM逐页生成详细内容 │
│ ↓ │
│ ✨ LLM质量优化和一致性检查 │
│ ↓ │
│ 🎨 python-pptx渲染为PPTX文件 │
│ ↓ │
│ 📊 高质量PPT输出 │
└─────────────────────────────────────────┘
```
**核心优势:**
- ✅ **完全离线**:所有组件本地运行,无需联网
- ✅ **知识驱动**:基于RAG确保内容专业、准确
- ✅ **高度可控**:中间JSON可人工编辑后再生成
- ✅ **模板化设计**:专业级视觉效果
- ✅ **可扩展**:易于添加新布局、新模板、新图表类型
claude sonnet
纯离线AI驱动PPT生成最佳解决方案
整体架构图
知识库文档 → 文档处理 → 向量数据库 → LLM推理 → PPT生成
↑ ↑ ↑ ↑ ↑
本地文件 Ollama/ ChromaDB 本地模型 python-pptx
PDF/Word LangChain Qwen/ 或Marp
第一层:核心组件选型
1. 本地LLM推理引擎
推荐方案:Ollama(最简单易用)
备选:
├── llama.cpp(性能最优)
├── LM Studio(有GUI)
└── vLLM(高吞吐,需要GPU)
2. 推荐模型
| 场景 | 模型 | 显存需求 |
|---|---|---|
| 中文PPT(最佳) | Qwen2.5:14b | 10GB |
| 平衡性能 | Qwen2.5:7b | 6GB |
| 低配机器 | Qwen2.5:3b | 3GB |
| 英文场景 | Llama3.1:8b | 6GB |
3. 知识库栈
文档解析:LlamaParse / Unstructured.io(离线版)
向量模型:bge-m3(中文最强,纯离线)
向量数据库:ChromaDB / Qdrant(本地模式)
编排框架:LangChain / LlamaIndex
第二层:完整技术栈安装
Step 1:安装Ollama + 模型
# Linux/Mac
curl -fsSL https://ollama.ai/install.sh | sh
# 拉取模型(离线后无需网络)
ollama pull qwen2.5:14b
ollama pull nomic-embed-text # 备用embedding
# 验证
ollama run qwen2.5:14b "你好"
Step 2:Python环境
conda create -n ppt-ai python=3.11
conda activate ppt-ai
pip install \
langchain \
langchain-community \
langchain-ollama \
chromadb \
sentence-transformers \
python-pptx \
pymupdf \ # PDF解析
python-docx \ # Word解析
unstructured \ # 通用文档解析
gradio # 可选Web界面
Step 3:离线Embedding模型
# 提前下载到本地(关键!)
python -c "
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('BAAI/bge-m3')
model.save('./models/bge-m3')
print('下载完成')
"
第三层:核心代码实现
模块1:知识库构建
# knowledge_base.py
import os
from pathlib import Path
from langchain_community.document_loaders import (
PyMuPDFLoader,
Docx2txtLoader,
TextLoader,
DirectoryLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
class KnowledgeBase:
def __init__(self, kb_path="./knowledge_base", model_path="./models/bge-m3"):
self.kb_path = kb_path
# 离线加载embedding模型
self.embeddings = HuggingFaceEmbeddings(
model_name=model_path,
model_kwargs={"device": "cuda"}, # 改为cpu如无GPU
encode_kwargs={"normalize_embeddings": True}
)
self.vectorstore = None
def load_documents(self, docs_dir: str):
"""加载多种格式文档"""
documents = []
docs_path = Path(docs_dir)
loaders_map = {
"*.pdf": PyMuPDFLoader,
"*.docx": Docx2txtLoader,
"*.txt": TextLoader,
"*.md": TextLoader,
}
for pattern, loader_cls in loaders_map.items():
for file_path in docs_path.glob(f"**/{pattern}"):
try:
loader = loader_cls(str(file_path))
docs = loader.load()
# 添加来源元数据
for doc in docs:
doc.metadata["source_file"] = file_path.name
documents.extend(docs)
print(f"✅ 已加载: {file_path.name}")
except Exception as e:
print(f"❌ 加载失败 {file_path.name}: {e}")
return documents
def build_index(self, docs_dir: str, persist_dir="./chroma_db"):
"""构建向量索引"""
documents = self.load_documents(docs_dir)
# 智能切分
splitter = RecursiveCharacterTextSplitter(
chunk_size=512,
chunk_overlap=64,
separators=["\n\n", "\n", "。", "!", "?", ";", " ", ""]
)
chunks = splitter.split_documents(documents)
print(f"📚 共切分 {len(chunks)} 个文本块")
# 构建向量库
self.vectorstore = Chroma.from_documents(
documents=chunks,
embedding=self.embeddings,
persist_directory=persist_dir
)
self.vectorstore.persist()
print(f"✅ 知识库已构建并保存至 {persist_dir}")
def load_existing(self, persist_dir="./chroma_db"):
"""加载已有知识库"""
self.vectorstore = Chroma(
persist_directory=persist_dir,
embedding_function=self.embeddings
)
print(f"✅ 已加载现有知识库,共 {self.vectorstore._collection.count()} 条记录")
def retrieve(self, query: str, top_k=5) -> list:
"""检索相关内容"""
if not self.vectorstore:
raise ValueError("请先构建或加载知识库")
results = self.vectorstore.similarity_search_with_score(query, k=top_k)
return [(doc.page_content, doc.metadata, score) for doc, score in results]
模块2:PPT结构生成器
# ppt_generator.py
import json
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
class PPTStructureGenerator:
def __init__(self, model="qwen2.5:14b"):
self.llm = OllamaLLM(
model=model,
temperature=0.3,
num_ctx=8192,
)
def generate_outline(self, topic: str, context: str, slides_count=12) -> dict:
"""生成PPT大纲"""
prompt = PromptTemplate(
input_variables=["topic", "context", "slides_count"],
template="""你是专业的PPT设计师和内容策划专家。
基于以下知识库内容,为主题"{topic}"生成一个专业的PPT大纲。
【知识库参考内容】
{context}
【要求】
- 生成约{slides_count}张幻灯片
- 结构清晰:封面→目录→内容→总结→结束页
- 每页有明确标题和3-5个关键要点
- 内容必须基于知识库,不要编造
- 要点要精炼,每条不超过20字
【输出格式】严格按JSON输出,不要有任何其他内容:
{{
"title": "PPT总标题",
"subtitle": "副标题",
"author": "作者/部门",
"slides": [
{{
"slide_number": 1,
"type": "cover",
"title": "标题",
"content": [],
"notes": "演讲备注"
}},
{{
"slide_number": 2,
"type": "agenda",
"title": "目录",
"content": ["章节1", "章节2", "章节3"],
"notes": ""
}},
{{
"slide_number": 3,
"type": "content",
"title": "页面标题",
"content": ["要点1", "要点2", "要点3"],
"highlight": "核心数据或金句(可选)",
"notes": "详细演讲稿"
}}
]
}}"""
)
chain = LLMChain(llm=self.llm, prompt=prompt)
result = chain.run(
topic=topic,
context=context,
slides_count=slides_count
)
# 提取JSON
try:
# 找到JSON开始和结束位置
start = result.find('{')
end = result.rfind('}') + 1
json_str = result[start:end]
return json.loads(json_str)
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}")
print(f"原始输出: {result}")
raise
def enrich_slide_content(self, slide: dict, context: str) -> dict:
"""深度丰富单张幻灯片内容"""
prompt = PromptTemplate(
input_variables=["slide_title", "points", "context"],
template="""基于以下参考资料,为PPT页面"{slide_title}"生成详细内容。
参考资料:
{context}
当前要点:
{points}
请生成:
1. 更详细的要点说明(每条扩展为1-2句话)
2. 相关数据或案例(如果资料中有)
3. 演讲备注(200字左右的演讲稿)
JSON格式输出:
{{
"detailed_points": ["详细要点1", "详细要点2"],
"data_highlight": "关键数据(没有则为空)",
"speaker_notes": "演讲备注内容"
}}"""
)
chain = LLMChain(llm=self.llm, prompt=prompt)
result = chain.run(
slide_title=slide.get("title", ""),
points="\n".join(slide.get("content", [])),
context=context
)
try:
start = result.find('{')
end = result.rfind('}') + 1
enriched = json.loads(result[start:end])
slide.update(enriched)
except:
pass
return slide
模块3:PPT文件渲染器
# ppt_renderer.py
from pptx import Presentation
from pptx.util import Inches, Pt, Emu
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
from pptx.util import Inches, Pt
import copy
class PPTRenderer:
"""高质量PPT渲染器"""
# 专业配色方案
THEMES = {
"blue_tech": {
"primary": RGBColor(0x1A, 0x5F, 0xAB), # 深蓝
"secondary": RGBColor(0x00, 0xB4, 0xD8), # 亮蓝
"accent": RGBColor(0xFF, 0x6B, 0x35), # 橙色
"background": RGBColor(0xF8, 0xF9, 0xFA), # 浅灰
"text_dark": RGBColor(0x1A, 0x1A, 0x2E), # 深色文字
"text_light": RGBColor(0xFF, 0xFF, 0xFF), # 白色文字
},
"dark_pro": {
"primary": RGBColor(0x0D, 0x1B, 0x2A),
"secondary": RGBColor(0x00, 0xF5, 0xFF),
"accent": RGBColor(0xFF, 0xD6, 0x00),
"background": RGBColor(0x16, 0x21, 0x3E),
"text_dark": RGBColor(0xE0, 0xE0, 0xE0),
"text_light": RGBColor(0xFF, 0xFF, 0xFF),
},
"green_nature": {
"primary": RGBColor(0x2D, 0x6A, 0x4F),
"secondary": RGBColor(0x52, 0xB7, 0x88),
"accent": RGBColor(0xF7, 0xC5, 0x9F),
"background": RGBColor(0xF0, 0xF7, 0xF4),
"text_dark": RGBColor(0x1B, 0x40, 0x32),
"text_light": RGBColor(0xFF, 0xFF, 0xFF),
}
}
def __init__(self, theme="blue_tech"):
self.prs = Presentation()
self.theme = self.THEMES[theme]
# 设置16:9比例
self.prs.slide_width = Inches(13.33)
self.prs.slide_height = Inches(7.5)
def _add_background(self, slide, color: RGBColor):
"""添加背景色"""
background = slide.background
fill = background.fill
fill.solid()
fill.fore_color.rgb = color
def _add_rectangle(self, slide, left, top, width, height, color, transparency=0):
"""添加矩形装饰"""
from pptx.util import Inches
shape = slide.shapes.add_shape(
1, # MSO_SHAPE_TYPE.RECTANGLE
Inches(left), Inches(top),
Inches(width), Inches(height)
)
shape.fill.solid()
shape.fill.fore_color.rgb = color
shape.line.fill.background()
if transparency > 0:
shape.fill.fore_color.theme_color = None
return shape
def _add_text(self, slide, text, left, top, width, height,
font_size=24, color=None, bold=False,
align=PP_ALIGN.LEFT, font_name="微软雅黑"):
"""添加文本框"""
txBox = slide.shapes.add_textbox(
Inches(left), Inches(top),
Inches(width), Inches(height)
)
tf = txBox.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
p.alignment = align
run = p.add_run()
run.text = text
font = run.font
font.name = font_name
font.size = Pt(font_size)
font.bold = bold
if color:
font.color.rgb = color
return txBox
def render_cover(self, slide_data: dict):
"""渲染封面页"""
slide_layout = self.prs.slide_layouts[6] # 空白
slide = self.prs.slides.add_slide(slide_layout)
t = self.theme
# 深色背景
self._add_background(slide, t["primary"])
# 顶部装饰条
self._add_rectangle(slide, 0, 0, 13.33, 0.15, t["secondary"])
# 底部装饰条
self._add_rectangle(slide, 0, 7.35, 13.33, 0.15, t["accent"])
# 左侧彩色竖条
self._add_rectangle(slide, 0.5, 1.5, 0.08, 4, t["secondary"])
# 主标题
self._add_text(
slide, slide_data.get("title", ""),
1.0, 2.0, 10, 1.5,
font_size=48, color=t["text_light"],
bold=True, align=PP_ALIGN.LEFT
)
# 副标题
self._add_text(
slide, slide_data.get("subtitle", ""),
1.0, 3.8, 9, 0.8,
font_size=24, color=t["secondary"],
bold=False, align=PP_ALIGN.LEFT
)
# 作者信息
self._add_text(
slide, slide_data.get("author", ""),
1.0, 5.5, 5, 0.5,
font_size=16, color=RGBColor(0xCC, 0xCC, 0xCC),
align=PP_ALIGN.LEFT
)
return slide
def render_content_slide(self, slide_data: dict, slide_num: int, total: int):
"""渲染内容页"""
slide_layout = self.prs.slide_layouts[6]
slide = self.prs.slides.add_slide(slide_layout)
t = self.theme
# 白色/浅色背景
self._add_background(slide, t["background"])
# 顶部标题栏
self._add_rectangle(slide, 0, 0, 13.33, 1.4, t["primary"])
# 顶部装饰条
self._add_rectangle(slide, 0, 1.4, 13.33, 0.06, t["secondary"])
# 标题文字
self._add_text(
slide, slide_data.get("title", ""),
0.5, 0.2, 11, 1.0,
font_size=32, color=t["text_light"],
bold=True, align=PP_ALIGN.LEFT
)
# 页码
self._add_text(
slide, f"{slide_num} / {total}",
11.5, 0.4, 1.5, 0.6,
font_size=14, color=t["secondary"],
align=PP_ALIGN.RIGHT
)
# 内容要点
content_items = slide_data.get("content", [])
y_start = 1.7
for i, item in enumerate(content_items):
# 要点编号圆圈
num_shape = self._add_rectangle(
slide, 0.5, y_start + i * 0.9, 0.35, 0.35,
t["secondary"]
)
# 编号文字
self._add_text(
slide, str(i + 1),
0.52, y_start + i * 0.9, 0.3, 0.35,
font_size=12, color=t["text_light"],
bold=True, align=PP_ALIGN.CENTER
)
# 要点内容
self._add_text(
slide, item,
1.1, y_start + i * 0.88, 11.5, 0.7,
font_size=20, color=t["text_dark"],
align=PP_ALIGN.LEFT
)
# 高亮数据(如果有)
highlight = slide_data.get("highlight") or slide_data.get("data_highlight")
if highlight:
self._add_rectangle(slide, 0.5, 6.5, 12.3, 0.7, t["accent"])
self._add_text(
slide, f"💡 {highlight}",
0.7, 6.55, 12, 0.6,
font_size=16, color=t["text_light"],
bold=True, align=PP_ALIGN.LEFT
)
# 演讲备注
notes = slide_data.get("notes") or slide_data.get("speaker_notes", "")
if notes:
slide.notes_slide.notes_text_frame.text = notes
return slide
def render_agenda(self, slide_data: dict):
"""渲染目录页"""
slide_layout = self.prs.slide_layouts[6]
slide = self.prs.slides.add_slide(slide_layout)
t = self.theme
self._add_background(slide, t["background"])
self._add_rectangle(slide, 0, 0, 13.33, 1.4, t["primary"])
self._add_rectangle(slide, 0, 1.4, 13.33, 0.06, t["secondary"])
self._add_text(
slide, "目 录",
0.5, 0.2, 11, 1.0,
font_size=36, color=t["text_light"],
bold=True, align=PP_ALIGN.LEFT
)
items = slide_data.get("content", [])
cols = 2 if len(items) > 4 else 1
for i, item in enumerate(items):
if cols == 2:
col = i % 2
row = i // 2
x = 0.8 + col * 6.2
y = 2.0 + row * 1.3
w = 5.8
else:
x, y, w = 1.5, 2.0 + i * 1.0, 10.0
# 序号