# cat split_file.py,分割表型文件
import csv
import sys
# 检查是否提供了输入文件名和trait文件名
if len(sys.argv) < 3:
print("Usage: python script.py <input_file.txt> <trait_file.txt>")
sys.exit(1)
# 获取输入文件名和trait文件名
input_file = sys.argv[1]
trait_file_name = sys.argv[2]
new_file_names = [] # 创建一个空列表来保存新文件名
# 打开并读取输入文件
with open(input_file, 'r') as infile:
# reader = csv.reader(infile, delimiter=' ')
reader = csv.reader(infile, delimiter=',')
headers = next(reader) # 读取第一行(表头)
new_files = {header: [] for header in headers[1:]}
for row in reader:
for i, header in enumerate(headers[1:]):
new_files[header].append((row[0], row[i+1]))
# 创建新文件
for header, rows in new_files.items():
new_file_name = f"{header}"
new_file_names.append(new_file_name)
with open(new_file_name, 'w') as outfile:
writer = csv.writer(outfile, delimiter='\t')
for row in rows:
writer.writerow(row)
# 将新文件名写入指定的trait文件
with open(trait_file_name, "w") as trait_file:
for name in new_file_names:
trait_file.write(name + '\n')
print(f"Files have been successfully created and new file names have been written to {trait_file_name}.")