python （2）txt数据批量归一化处理（每行为时间戳+数值）

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
@author: Suyue
@file: hhh.py
@time: 2025/04/02
@desc:雨滴谱数据归一化处理
"""
import os
import numpy as np


def process_raindrop_files(folder_path):
    """
    处理文件夹中的所有雨滴谱txt文件，对数浓度数据进行min-max归一化

    参数:
        folder_path: 包含雨滴谱txt文件的文件夹路径

    返回:
        一个字典，包含每个文件的原始数据和归一化后的数据
    """
    # 存储所有文件的数据
    all_data = {}

    # 获取文件夹中所有txt文件
    file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

    # 收集所有数浓度值用于计算全局最小最大值
    all_concentrations = []

    # 第一遍读取：收集所有数浓度值
    for filename in file_list:
        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r', encoding='utf-8') as file:
            for line in file:
                line = line.strip()
                # 跳过空行和"Sequence from"开头的行
                if not line or line.startswith("Sequence from"):
                    continue

                # 分割时间戳和数值
                parts = line.split()
                if len(parts) >= 2:
                    try:
                        concentration = float(parts[-1])  # 取最后一个元素作为数值
                        all_concentrations.append(concentration)
                    except ValueError:
                        continue

    # 计算全局最小值和最大值
    if not all_concentrations:
        print("未找到有效的数浓度数据")
        return None

    global_min = min(all_concentrations)
    global_max = max(all_concentrations)

    print(f"全局最小数浓度: {global_min}, 全局最大数浓度: {global_max}")

    # 第二遍读取：处理每个文件并进行归一化
    for filename in file_list:
        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r', encoding='utf-8') as file:
            timestamps = []
            concentrations = []
            normalized_data = []

            for line in file:
                line = line.strip()
                # 跳过空行和"Sequence from"开头的行
                if not line or line.startswith("Sequence from"):
                    continue

                # 分割时间戳和数值
                parts = line.split()
                if len(parts) >= 2:
                    try:
                        timestamp = ' '.join(parts[:-1])  # 合并除最后一部分外的所有部分作为时间戳
                        concentration = float(parts[-1])

                        # 计算归一化值
                        if global_max != global_min:  # 避免除以零
                            normalized = (concentration - global_min) / (global_max - global_min)
                        else:
                            normalized = 0.0

                        timestamps.append(timestamp)
                        concentrations.append(concentration)
                        normalized_data.append(normalized)
                    except ValueError:
                        continue

        # 存储结果
        all_data[filename] = {
            'timestamps': timestamps,
            'original_concentrations': concentrations,
            'normalized_concentrations': normalized_data
        }

        # 打印部分结果以供验证
        print(f"\n文件: {filename}")
        print("前5个数据点:")
        for i in range(min(5, len(timestamps))):
            print(f"时间: {timestamps[i]}, 原始值: {concentrations[i]:.4f}, 归一化值: {normalized_data[i]:.4f}")

    return all_data


def save_normalized_data(processed_data, output_folder):
    """保存归一化后的数据到新文件"""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename, data in processed_data.items():
        output_path = os.path.join(output_folder, f"normalized_{filename}")
        with open(output_path, 'w', encoding='utf-8') as f:
            for timestamp, norm_val in zip(data['timestamps'], data['normalized_concentrations']):
                f.write(f"{timestamp} {norm_val:.6f}\n")


# 使用示例
folder_path = "F:/lianxi2/"  # 替换为你的文件夹路径
processed_data = process_raindrop_files(folder_path)

# 保存归一化数据（可选）
if processed_data:
    output_folder = "F:/lianxi2/normalized_results"
    save_normalized_data(processed_data, output_folder)
posted @ 2025-05-07 17:27 秋刀鱼CCC Views(60) Comments(0) 收藏举报
刷新页面返回顶部
秋刀鱼CCC

Never be ashamed of trying

python （2）txt数据批量归一化处理（每行为时间戳+数值）

公告