2月份测试使用

import re

# 读取文件内容
with open("1.txt", "r", encoding="utf-8") as file:
    content = file.read()

# 使用正则表达式查找所有 class="c08e6e93" 之后的文本内容
# 假设内容在标签内,如 <div class="c08e6e93">提取的文本</div>
pattern = r'class="c08e6e93"[^>]*>([^<]+)<'
matches = re.findall(pattern, content)

# 保存到文件
with open("extracted_texts.txt", "w", encoding="utf-8") as output_file:
    for text in matches:
        output_file.write(text.strip() + "\n")

print(f"共提取 {len(matches)} 条记录,已保存到 extracted_texts.txt")

posted @ 2026-02-05 13:38  lvsuifeng  阅读(5)  评论(0)    收藏  举报