epub转txt
from bs4 import BeautifulSoup
def exact_p_tag(path,f):
    xhtml_file = open(path, 'r', encoding='utf-8')
    xhtml_handle = xhtml_file.read()
    soup = BeautifulSoup(xhtml_handle, 'lxml')
    title = soup.find_all("title")
    # print(title)
    p_list = soup.find_all('p')
    for p in p_list:
        f.write(p.text+'\n')
    xhtml_file.close()
import os
os.chdir('C:/Users/tellw/Downloads/test')
from pathlib import Path
xhtml_file_paths=list(Path('EPUB/xhtml').glob('*.xhtml'))
f=open('C:/Users/tellw/test/test.txt','w',encoding='utf8')
for xfp in xhtml_file_paths:
    exact_p_tag(xfp,f)
f.close()
使用 Python 提取 epub 中的文本 https://fanlumaster.github.io/2021/07/08/使用-Python-提取-epub-中的文本/
创建于2409071243,修改于2409071243

 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号