分割表型数据文件-(genotype,phenotype)

# cat split_file.py,分割表型文件
import csv
import sys

# 检查是否提供了输入文件名和trait文件名
if len(sys.argv) < 3:
    print("Usage: python script.py <input_file.txt> <trait_file.txt>")
    sys.exit(1)

# 获取输入文件名和trait文件名
input_file = sys.argv[1]
trait_file_name = sys.argv[2]

new_file_names = []  # 创建一个空列表来保存新文件名

# 打开并读取输入文件
with open(input_file, 'r') as infile:
  # reader = csv.reader(infile, delimiter=' ')
    reader = csv.reader(infile, delimiter=',')
    headers = next(reader)  # 读取第一行(表头)
    new_files = {header: [] for header in headers[1:]}

    for row in reader:
        for i, header in enumerate(headers[1:]):
            new_files[header].append((row[0], row[i+1]))

    # 创建新文件
    for header, rows in new_files.items():
        new_file_name = f"{header}"
        new_file_names.append(new_file_name)
        with open(new_file_name, 'w') as outfile:
            writer = csv.writer(outfile, delimiter='\t')
            for row in rows:
                writer.writerow(row)

# 将新文件名写入指定的trait文件
with open(trait_file_name, "w") as trait_file:
    for name in new_file_names:
        trait_file.write(name + '\n')

print(f"Files have been successfully created and new file names have been written to {trait_file_name}.")
posted on 2024-01-20 12:38  燕子南飞0415  阅读(44)  评论(0)    收藏  举报