如何使用python进行pdf文件分割
1.安装 PyPDF2 包
pip install PyPDF2然后import PyPDF2
2.在 PyPDF2 库中,可以使用以下代码打开 PDF 文件:
pdf_file = open('filename.pdf', 'rb')
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
total_pages = pdf_reader.numPages
3.下面代码将每一页分开
from PyPDF2 import PdfFileReader,PdfFileWriter
pdf_path = r"F:\工作\1.pdf"
save_path = r"F:\工作\a\a"
# Split Pages of PDF
pdf_reader = PdfFileReader(pdf_path)
for i in range(0,pdf_reader.getNumPages()):
pdf_writer = PdfFileWriter()
pdf_writer.addPage(pdf_reader.getPage(i))
# Every page write to a path
with open(save_path+'{}.pdf'.format(str(i)), 'wb') as fh:
pdf_writer.write(fh)
print('{} Save Sucessfully !\n'.format(str(i)))
4. 2个PDF 文件合并为1个
from PyPDF2 import PdfFileReader,PdfFileWriter
merge_pdf = r"F:\工作\z.pdf"
p1_pdf = r"F:\工作\a\a0.pdf"
p2_pdf = r"F:\工作\a\a1.pdf"
p1_reader = PdfFileReader(p1_pdf)
p2_reader = PdfFileReader(p2_pdf)
merge = PdfFileWriter()
# Write p1
for i in range(0,p1_reader.getNumPages()):
merge.addPage(p1_reader.getPage(i))
# Write p2
for j in range(0,p2_reader.getNumPages()):
merge.addPage(p2_reader.getPage(j))
# Write out
with open(merge_pdf,'wb') as f:
merge.write(f)
5.将多个单页合并成一页
from PyPDF2 import PdfFileReader,PdfFileWriter
merge_pdf = r"F:\工作\z.pdf"
p_pdf = r"F:\工作\a\a"
merge = PdfFileWriter()
for i in range(220,227):
p_reader = PdfFileReader(p_pdf + str(i) +'.pdf')
for i in range(0,p_reader.getNumPages()):
merge.addPage(p_reader.getPage(i))
# Write out
with open(merge_pdf,'wb') as f:
merge.write(f)
6. 直接截至pdf中的某几页
from PyPDF2 import PdfFileReader,PdfFileWriter
#用偏量实现纸质书页码向电子书页码的转换
offset = 11
#纸质书页码范围
page_start = 10
page_end = 15
all_pdf = r"F:\工作\a.pdf"
part_pdf = r"F:\工作\p.pdf"
p_reader = PdfFileReader(all_pdf)
p_writer = PdfFileWriter()
for i in range(page_start + offset ,page_end + offset + 1):
p_writer.addPage(p_reader.getPage(i))
with open(part_pdf,'wb') as f:
p_writer.write(f)
参考:https://pythonjishu.com/ugmvrnorpclhikd/
https://zhuanlan.zhihu.com/p/357378479?utm_id=0
浙公网安备 33010602011771号