python txt文件按照"Sequence from"开头的行进行分割

将原txt文件按照"Sequence from"开头的行进行分割,并将每个分割后的部分保存为独立的txt文件

我的原txt文件"Sequence from"开头的就是一次降水事件

有的txt文件有多个"Sequence from"开头的行

想以"Sequence from"开头进行分割,出现"Sequence from"就分割成一个新txt

import os
from pathlib import Path


def process_files(input_folder, output_folder):
    # 确保输出文件夹存在
    Path(output_folder).mkdir(parents=True, exist_ok=True)

    # 遍历输入文件夹中的所有txt文件
    for filename in os.listdir(input_folder):
        if filename.endswith('.txt'):
            input_file = os.path.join(input_folder, filename)
            print(f"Processing file: {filename}")

            # 处理单个文件,将结果直接保存到输出文件夹
            split_sequences(input_file, output_folder, filename)


def split_sequences(input_file, output_folder, original_filename):
    with open(input_file, 'r') as f:
        current_sequence = None
        current_content = []
        sequence_count = 0
        base_name = os.path.splitext(original_filename)[0]  # 去除.txt后缀

        for line in f:
            line = line.strip()
            if line.startswith("Sequence from"):
                # 保存前一个sequence
                if current_sequence is not None:
                    save_sequence(output_folder, base_name, sequence_count, current_sequence, current_content)
                    sequence_count += 1
                current_sequence = line
                current_content = []
            else:
                if line:  # 忽略空行
                    current_content.append(line)

        # 保存最后一个sequence
        if current_sequence is not None:
            save_sequence(output_folder, base_name, sequence_count, current_sequence, current_content)


def save_sequence(output_folder, base_name, count, header, content):
    # 生成文件名格式:原文件名_三位序号.txt
    filename = f"{base_name}_{count + 1:03d}.txt"
    filepath = os.path.join(output_folder, filename)

    with open(filepath, 'w') as f:
        f.write(header + "\n")
        f.write("\n".join(content))

    print(f"  Saved: {filename}")


# 使用示例
if __name__ == "__main__":
    input_folder = "F:/lianxi2"  # 存放所有要处理的txt文件的文件夹
    output_folder = "F:/lianxi2/normalized_results"  # 所有结果将存放在这个单一文件夹中

    process_files(input_folder, output_folder)
    print("\nAll files processed successfully!")

 

posted @ 2025-05-09 10:51  秋刀鱼CCC  Views(15)  Comments(0)    收藏  举报