![]()
![]()
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
@author: Suyue
@file: hhh.py
@time: 2025/04/02
@desc:雨滴谱数据归一化处理
"""
import os
import numpy as np
def process_raindrop_files(folder_path):
"""
处理文件夹中的所有雨滴谱txt文件,对数浓度数据进行min-max归一化
参数:
folder_path: 包含雨滴谱txt文件的文件夹路径
返回:
一个字典,包含每个文件的原始数据和归一化后的数据
"""
# 存储所有文件的数据
all_data = {}
# 获取文件夹中所有txt文件
file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
# 收集所有数浓度值用于计算全局最小最大值
all_concentrations = []
# 第一遍读取:收集所有数浓度值
for filename in file_list:
filepath = os.path.join(folder_path, filename)
with open(filepath, 'r', encoding='utf-8') as file:
for line in file:
line = line.strip()
# 跳过空行和"Sequence from"开头的行
if not line or line.startswith("Sequence from"):
continue
# 分割时间戳和数值
parts = line.split()
if len(parts) >= 2:
try:
concentration = float(parts[-1]) # 取最后一个元素作为数值
all_concentrations.append(concentration)
except ValueError:
continue
# 计算全局最小值和最大值
if not all_concentrations:
print("未找到有效的数浓度数据")
return None
global_min = min(all_concentrations)
global_max = max(all_concentrations)
print(f"全局最小数浓度: {global_min}, 全局最大数浓度: {global_max}")
# 第二遍读取:处理每个文件并进行归一化
for filename in file_list:
filepath = os.path.join(folder_path, filename)
with open(filepath, 'r', encoding='utf-8') as file:
timestamps = []
concentrations = []
normalized_data = []
for line in file:
line = line.strip()
# 跳过空行和"Sequence from"开头的行
if not line or line.startswith("Sequence from"):
continue
# 分割时间戳和数值
parts = line.split()
if len(parts) >= 2:
try:
timestamp = ' '.join(parts[:-1]) # 合并除最后一部分外的所有部分作为时间戳
concentration = float(parts[-1])
# 计算归一化值
if global_max != global_min: # 避免除以零
normalized = (concentration - global_min) / (global_max - global_min)
else:
normalized = 0.0
timestamps.append(timestamp)
concentrations.append(concentration)
normalized_data.append(normalized)
except ValueError:
continue
# 存储结果
all_data[filename] = {
'timestamps': timestamps,
'original_concentrations': concentrations,
'normalized_concentrations': normalized_data
}
# 打印部分结果以供验证
print(f"\n文件: {filename}")
print("前5个数据点:")
for i in range(min(5, len(timestamps))):
print(f"时间: {timestamps[i]}, 原始值: {concentrations[i]:.4f}, 归一化值: {normalized_data[i]:.4f}")
return all_data
def save_normalized_data(processed_data, output_folder):
"""保存归一化后的数据到新文件"""
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for filename, data in processed_data.items():
output_path = os.path.join(output_folder, f"normalized_{filename}")
with open(output_path, 'w', encoding='utf-8') as f:
for timestamp, norm_val in zip(data['timestamps'], data['normalized_concentrations']):
f.write(f"{timestamp} {norm_val:.6f}\n")
# 使用示例
folder_path = "F:/lianxi2/" # 替换为你的文件夹路径
processed_data = process_raindrop_files(folder_path)
# 保存归一化数据(可选)
if processed_data:
output_folder = "F:/lianxi2/normalized_results"
save_normalized_data(processed_data, output_folder)