python 提取pdf中的文字

from pathlib import Path

from pypdf import PdfReader

pdf_path = (
    Path.home()
    / "pydf"
    / "practice_files"
    / "Pride_and_Prejudice.pdf"
)

pdf_reader = PdfReader(pdf_path)
txt_file = Path.home() / "Pride_and_Prejudice.txt"
content = [
    f"{pdf_reader.metadata.title}",
    f"Number of pages: {len(pdf_reader.pages)}"
]

for page in pdf_reader.pages:
    content.append(page.extract_text())

txt_file.write_text("\n".join(content))
posted @ 2025-08-25 16:22  乐乐乐乐乐乐樂  阅读(9)  评论(0)    收藏  举报
jQuery火箭图标返回顶部代码