识别与读取PDF文件

from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager,process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams


pdf_file = open(r'C:/Users/13375/Desktop/python/pdf测试.pdf','rb')
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()

device = TextConverter(rsrcmgr=rsrcmgr,outfp=retstr,laparams=laparams)
process_pdf(rsrcmgr=rsrcmgr,device=device,fp=pdf_file)
device.close()
content = retstr.getvalue()
retstr.close()
pdf_file.close()

print(content)

posted @ 2020-02-21 20:40  悠悠的奶茶  阅读(832)  评论(0)    收藏  举报