使用python切割pdf文件

应用场景,需要使用python切割指定分页数据的pdf文件

首先使用pip安装以下包
pip install PyPDF2
pip install flask
实现代码如下:

import sys
from PyPDF2 import PdfReader, PdfWriter


def split_pdf(input_path, output_path, start_page, end_page):
    with open(input_path, 'rb') as input_file:
        pdf = PdfReader(input_file)
        total_pages = len(pdf.pages)

        # 处理起始页和结束页超出范围的情况
        if start_page < 0:
            start_page = 0
        if end_page >= total_pages:
            end_page = total_pages - 1

        # 创建一个新的 PDF writer 对象,并拷贝指定页范围的页面
        output_pdf = PdfWriter()
        for page_num in range(start_page, end_page + 1):
            output_pdf.add_page(pdf.pages[page_num])

        # 将切割后的 PDF 页面保存到输出文件
        with open(output_path, 'wb') as output_file:
            output_pdf.write(output_file)


# 通过命令行参数获取传递的参数
inputPath = sys.argv[1]
outputPath = sys.argv[2]
startPage = int(sys.argv[3])
endPage = int(sys.argv[4])

split_pdf(inputPath, outputPath, startPage, endPage)

也可以封装成一个flask接口:

import logging
from flask import Flask, request, jsonify
from PyPDF2 import PdfReader, PdfWriter

#pip install PyPDF2
#pip install flask
#pip install waitress

app = Flask(__name__)

# 配置日志记录器
logging.basicConfig(filename='app.log', level=logging.INFO)

@app.route('/api/split_pdf', methods=['POST'])
def split_pdf():
    # 记录请求参数
    logging.info(f"Request Parameters: {request.form}")

    # 通过 POST 请求获取传递的参数
    input_path = request.form.get('input_path')
    output_path = request.form.get('output_path')
    start_page = int(request.form.get('start_page'))
    end_page = int(request.form.get('end_page'))

    try:
        with open(input_path, 'rb') as input_file:
            pdf = PdfReader(input_file)
            total_pages = len(pdf.pages)

            # 处理起始页和结束页超出范围的情况
            if start_page < 0:
                start_page = 0
            if end_page >= total_pages:
                end_page = total_pages - 1

            # 创建一个新的 PDF writer 对象,并拷贝指定页范围的页面
            output_pdf = PdfWriter()
            for page_num in range(start_page, end_page + 1):
                output_pdf.add_page(pdf.pages[page_num])

            # 将切割后的 PDF 页面保存到输出文件
            with open(output_path, 'wb') as output_file:
                output_pdf.write(output_file)

        logging.info(f"Split PDF: {input_path} -> {output_path}")
        # 返回成功的响应
        response = {
            'status': 'success',
            'message': 'PDF 文件切割成功。',
            'output_path': output_path
        }
    except Exception as e:
        # 返回错误的响应
        response = {
            'status': 'error',
            'message': str(e)
        }
        logging.error(f"Split PDF error: {str(e)}")

    return jsonify(response)


if __name__ == '__main__':
    app.run(debug=True)
posted @ 2023-07-26 17:51  八戒vs  阅读(319)  评论(0编辑  收藏  举报