python合并多个PDF并添加页码

import io
import os
from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

# 注册中文字体
pdfmetrics.registerFont(TTFont("SimSun", "/usr/local/share/fonts/simsun.ttc"))

# 开始页码
count = 3


def add_page_number(input_pdf_path, output_pdf_path):
    # Create a PDF with page numbers
    packet = io.BytesIO()
    can = canvas.Canvas(packet, pagesize=A4)

    width, height = A4
    global count
    for i in range(len(PdfReader(input_pdf_path).pages)):
        # 设置字体和大小
        can.setFont("SimSun", 12)
        can.drawString((width - 20) / 2, 20, str(count))
        count += 1
        can.showPage()
    can.save()

    # Move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfReader(packet)

    # Read pages from the original PDF and the new PDF with page numbers
    existing_pdf = PdfReader(input_pdf_path)
    output = PdfWriter()

    for i in range(len(existing_pdf.pages)):
        page = existing_pdf.pages[i]
        page.merge_page(new_pdf.pages[i])
        output.add_page(page)

    with open(output_pdf_path, "wb") as outputStream:
        output.write(outputStream)


def merge_pdfs_with_page_numbers(pdf_list, output_path):
    temp_files = []
    for i, pdf in enumerate(pdf_list):
        temp_file = f"temp_{i}.pdf"
        add_page_number(pdf, temp_file)
        temp_files.append(temp_file)

    # Merge all temporary files into one final PDF
    merger = PdfWriter()
    for temp_file in temp_files:
        reader = PdfReader(temp_file)
        for page in reader.pages:
            merger.add_page(page)

    with open(output_path, "wb") as outputStream:
        merger.write(outputStream)

    # Clean up temporary files
    for temp_file in temp_files:
        os.remove(temp_file)


if __name__ == "__main__":
    pdf_files = [
        "1.pdf",
        "2.pdf",
    ]  # List your PDF files here
    output_pdf = "merged.pdf"
    merge_pdfs_with_page_numbers(pdf_files, output_pdf)

posted @ 2024-12-16 08:46  卓能文  阅读(126)  评论(0)    收藏  举报