Python将HTML转MD

1.下载hteml2text模块

2.代码

import os
import html2text

input_folder = "C:\\Users\\jude\\Desktop\\res\\cnblogs_blog_judes.20240831122513\\judes"  # 输入文件夹路径
output_folder = "C:\\Users\\jude\\Desktop\\res\\cnblogs_blog_judes.20240831122513\\new"  # 输出文件夹路径

# 创建 html2text 的实例
converter = html2text.HTML2Text()
converter.body_width = 0  # 不进行换行

# 遍历文件夹中的所有文件
for filename in os.listdir(input_folder):
    if filename.endswith(".html"):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename.replace(".html", ".md"))

        # 读取 HTML 文件内容
        with open(input_path, "r", encoding="utf-8") as f:
            html_content = f.read()

        # 将 HTML 转换为 Markdown
        markdown_content = converter.handle(html_content)

        # 将 Markdown 内容写入输出文件
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(markdown_content)

        print(f"Converted {input_path} to {output_path}")

 

posted @ 2024-08-31 21:31  朱小勇  阅读(193)  评论(0)    收藏  举报