pdf转word(以图片形式)


import fitz # PyMuPDF
from docx import Document
from docx.shared import Inches
import io


def pdf_to_word_screenshots(pdf_path, zoom=10):
"""
使用PyMuPDF将PDF转换为Word中的图片

参数:
pdf_path: 输入的PDF文件路径
zoom: 缩放因子(默认2,提高分辨率)
"""
doc = Document()
pdf_doc = fitz.open(pdf_path)

for page_num in range(len(pdf_doc)):
page = pdf_doc.load_page(page_num)
mat = fitz.Matrix(zoom, zoom) # 提高分辨率
pix = page.get_pixmap(matrix = mat)
img_bytes = pix.tobytes("png")

# 将图片添加到Word
img_io = io.BytesIO(img_bytes)
doc.add_picture(img_io, width = Inches(6))
doc.add_page_break()

word_path = pdf_path + ".docx"
doc.save(word_path)
print(f"Word文档已保存到: {word_path}")

# 安装PyMuPDF: pip install pymupdf
pdf_to_word_screenshots(r"C:\Users\12997\Desktop\xxxPDF.pdf")

############################################################################ 一下是将pdf转为png保存
import fitz  # PyMuPDF
import os

def pdf_to_png(pdf_path, output_folder, zoom=2):
"""
将PDF每一页保存为PNG图片

参数:
pdf_path: 输入的PDF文件路径
output_folder: 输出图片的文件夹路径
zoom: 缩放因子(默认2,提高分辨率)
"""
# 创建输出文件夹(如果不存在)
# os.makedirs(output_folder, exist_ok = True)

# 打开PDF文件
pdf_doc = fitz.open(pdf_path)

for page_num in range(len(pdf_doc)):
page = pdf_doc.load_page(page_num)
mat = fitz.Matrix(zoom, zoom) # 设置分辨率

# 获取页面像素图
pix = page.get_pixmap(matrix = mat)

# 构建输出文件名
output_path = os.path.join(output_folder, f"page_{page_num + 1}.png")

# 保存为PNG
pix.save(output_path)
print(f"已保存: {output_path}")


# 使用示例
pdf_path = r"C:\Users\xxx\Desktop\PDF.pdf"
output_folder = r"C:\Users\xxx\Desktop"
pdf_to_png(pdf_path, output_folder, zoom = 4)




posted @ 2025-06-12 15:55  默*为  阅读(25)  评论(0)    收藏  举报