目录提取与设置

1. 提取目录

from PyPDF2 import PdfReader
from PyPDF2.errors import PdfReadError

def extract_pdf_catalog(pdf_path, save_txt=True):
    """
    提取PDF目录(书签)
    :param pdf_path: PDF文件路径
    :param save_txt: 是否自动保存目录到txt
    :return: 目录列表
    """
    try:
        reader = PdfReader(pdf_path)
        outlines = reader.outline

        if not outlines:
            return ["⚠️ 该PDF没有目录/书签"]

        catalog = []

        def traverse(items, level=0):
            for entry in items:
                if isinstance(entry, list):
                    traverse(entry, level + 1)
                else:
                    title = entry.title
                    try:
                        page = reader.get_page_number(entry.page)
                    except:
                        page = "未知"

                    indent = "    " * level
                    catalog.append(f"{indent}▸ {title} (第{page}页)")

        traverse(outlines)

        # 保存文件
        if save_txt:
            with open("PDF目录.txt", "w", encoding="utf-8") as f:
                f.write(f"【{pdf_path} 目录】\n\n")
                f.write("\n".join(catalog))
            print("✅ 目录已保存到:PDF目录.txt")

        return catalog

    except FileNotFoundError:
        return ["❌ 找不到PDF文件"]
    except PdfReadError:
        return ["❌ 该文件不是有效的PDF"]
    except Exception as e:
        return [f"❌ 错误:{str(e)}"]

# ========== 运行 ==========
if __name__ == "__main__":
    pdf_path = "从这里学NVH——噪声、振动、模态分析的入门与进阶.pdf"  # 改成你的文件
    result = extract_pdf_catalog(pdf_path)

    print("\n===== 提取结果 =====")
    for line in result:
        print(line)

2.设置目录

from PyPDF2 import PdfReader, PdfWriter

def add_bookmarks_to_pdf(input_pdf: str, output_pdf: str):
    """
    给PDF添加目录书签(你提供的完整目录已内置)
    :param input_pdf: 原始PDF路径
    :param output_pdf: 生成带目录的PDF路径
    """
    reader = PdfReader(input_pdf)
    writer = PdfWriter()

    # 复制所有页面
    for page in reader.pages:
        writer.add_page(page)

    # ===================== 你的完整目录(已内置)=====================
    catalog_data = [
        ("第1章 工程噪声基础", 18, 0),
        ("1.1  什么是声波", 19, 1),

        ("5.15.6  二者的关联性", 329, 2),
        ("后记", 331, 0),
        ("参考文献", 336, 0),
        ("后折页", 358, 0),
    ]

    # 存储父级书签,用于层级嵌套
    parent_bookmarks = {}

    for title, page_num, level in catalog_data:
        # PyPDF2页码从0开始,所以要 -1
        actual_page = page_num - 1

        if level == 0:
            # 一级目录
            parent = writer.add_outline_item(title, actual_page)
            parent_bookmarks[level] = parent
        else:
            # 二级/三级目录,挂在上一级
            parent = parent_bookmarks[level - 1]
            child = writer.add_outline_item(title, actual_page, parent=parent)
            parent_bookmarks[level] = child

    # 保存新PDF
    with open(output_pdf, "wb") as f:
        writer.write(f)

    print(f"✅ 目录添加完成!\n📄 输出文件:{output_pdf}")

# ===================== 在这里修改你的PDF路径 =====================
if __name__ == "__main__":
    INPUT_PDF = "从这里学NVH——噪声、振动、模态分析的入门与进阶 - 副本.pdf"       # 原始PDF
    OUTPUT_PDF = "带目录_新书.pdf"   # 生成的PDF
    add_bookmarks_to_pdf(INPUT_PDF, OUTPUT_PDF)
posted @ 2026-03-19 09:12  redufa  阅读(4)  评论(0)    收藏  举报