1. 提取目录
from PyPDF2 import PdfReader
from PyPDF2.errors import PdfReadError
def extract_pdf_catalog(pdf_path, save_txt=True):
"""
提取PDF目录(书签)
:param pdf_path: PDF文件路径
:param save_txt: 是否自动保存目录到txt
:return: 目录列表
"""
try:
reader = PdfReader(pdf_path)
outlines = reader.outline
if not outlines:
return ["⚠️ 该PDF没有目录/书签"]
catalog = []
def traverse(items, level=0):
for entry in items:
if isinstance(entry, list):
traverse(entry, level + 1)
else:
title = entry.title
try:
page = reader.get_page_number(entry.page)
except:
page = "未知"
indent = " " * level
catalog.append(f"{indent}▸ {title} (第{page}页)")
traverse(outlines)
# 保存文件
if save_txt:
with open("PDF目录.txt", "w", encoding="utf-8") as f:
f.write(f"【{pdf_path} 目录】\n\n")
f.write("\n".join(catalog))
print("✅ 目录已保存到:PDF目录.txt")
return catalog
except FileNotFoundError:
return ["❌ 找不到PDF文件"]
except PdfReadError:
return ["❌ 该文件不是有效的PDF"]
except Exception as e:
return [f"❌ 错误:{str(e)}"]
# ========== 运行 ==========
if __name__ == "__main__":
pdf_path = "从这里学NVH——噪声、振动、模态分析的入门与进阶.pdf" # 改成你的文件
result = extract_pdf_catalog(pdf_path)
print("\n===== 提取结果 =====")
for line in result:
print(line)
2.设置目录
from PyPDF2 import PdfReader, PdfWriter
def add_bookmarks_to_pdf(input_pdf: str, output_pdf: str):
"""
给PDF添加目录书签(你提供的完整目录已内置)
:param input_pdf: 原始PDF路径
:param output_pdf: 生成带目录的PDF路径
"""
reader = PdfReader(input_pdf)
writer = PdfWriter()
# 复制所有页面
for page in reader.pages:
writer.add_page(page)
# ===================== 你的完整目录(已内置)=====================
catalog_data = [
("第1章 工程噪声基础", 18, 0),
("1.1 什么是声波", 19, 1),
("5.15.6 二者的关联性", 329, 2),
("后记", 331, 0),
("参考文献", 336, 0),
("后折页", 358, 0),
]
# 存储父级书签,用于层级嵌套
parent_bookmarks = {}
for title, page_num, level in catalog_data:
# PyPDF2页码从0开始,所以要 -1
actual_page = page_num - 1
if level == 0:
# 一级目录
parent = writer.add_outline_item(title, actual_page)
parent_bookmarks[level] = parent
else:
# 二级/三级目录,挂在上一级
parent = parent_bookmarks[level - 1]
child = writer.add_outline_item(title, actual_page, parent=parent)
parent_bookmarks[level] = child
# 保存新PDF
with open(output_pdf, "wb") as f:
writer.write(f)
print(f"✅ 目录添加完成!\n📄 输出文件:{output_pdf}")
# ===================== 在这里修改你的PDF路径 =====================
if __name__ == "__main__":
INPUT_PDF = "从这里学NVH——噪声、振动、模态分析的入门与进阶 - 副本.pdf" # 原始PDF
OUTPUT_PDF = "带目录_新书.pdf" # 生成的PDF
add_bookmarks_to_pdf(INPUT_PDF, OUTPUT_PDF)