# encoding=utf8
#-*-coding:utf-8 -*-

#pip install  pypdf2 -i https://pypi.tuna.tsinghua.edu.cn/simple

import PyPDF2
from io import StringIO


content_all_list = []

# 打开PDF文件并创建一个PyPDF2对象
with open('Scrum-Guide-Chinese-Simplified.pdf', 'rb') as fp:
    pdf_reader = PyPDF2.PdfReader(fp)



    #总页数
    page_nums = len(pdf_reader.pages)

    for i in range(0,page_nums):
        # 获取PDF文档的第一页
        page = pdf_reader.pages[i]

        # 解析PDF页面并提取文本内容
        text_content = page.extract_text()

        print(text_content)
        content_all_list.append(text_content)




print(content_all_list)

# 将文本内容写入到一个新的txt文件中
#print()方法在Win7的默认编码是gbk,它在打印时,并不是所有的字符都支持的,所以此处换成 gb18030
with open('Scrum-Guide-Chinese-Simplified.txt', 'a' ,encoding='gb18030') as txt_file:
    for one in content_all_list:
        txt_file.write(one)

 

posted on 2023-04-29 19:26  大话人生  阅读(297)  评论(0)    收藏  举报