2月份测试使用
import re
# 读取文件内容
with open("1.txt", "r", encoding="utf-8") as file:
content = file.read()
# 使用正则表达式查找所有 class="c08e6e93" 之后的文本内容
# 假设内容在标签内,如 <div class="c08e6e93">提取的文本</div>
pattern = r'class="c08e6e93"[^>]*>([^<]+)<'
matches = re.findall(pattern, content)
# 保存到文件
with open("extracted_texts.txt", "w", encoding="utf-8") as output_file:
for text in matches:
output_file.write(text.strip() + "\n")
print(f"共提取 {len(matches)} 条记录,已保存到 extracted_texts.txt")

浙公网安备 33010602011771号