把文件内容按照染色体分开写出
测试序列如下,text.txt:
chr2 43995310 43995986chr17 49788603 49789067chr17 59565573 59566163chr19 8390308 8390745chr12 49188033 49189033chr7 974903 975570chr7 98878532 98879500chr7 44044672 44045322chr1 153634052 153634772chr11 60905850 60906575直接看代码:
1 #encoding = utf-8 2 3 import sys 4 from collections import OrderedDict 5 6 7 8 def readfasta(filename): 9 10 tmp_dict = OrderedDict() 11 12 with open(filename) as f: 13 14 for line in f: 15 line = line.rstrip().split(' ',1) 16 17 chr_id = line[0] 18 19 if chr_id not in tmp_dict: 20 tmp_dict[chr_id] = line[1] 21 22 else: 23 tmp_dict[chr_id] += line[1] 24 25 return tmp_dict 26 27 def seperatefile(filename,outfile): 28 29 data = readfasta(filename) 30 31 for chr_id,features in data.items(): 32 import os 33 (name,ext) = os.path.splitext(outfile) 34 35 with open('%s_%s%s' %(name,chr_id,ext),'w') as f_out: 36 f_out.write('%s\n' %chr_id) 37 f_out.write('%s\n' %features) 38 39 seperatefile('test.txt','output.txt')
推荐论坛:生信技能树,http://biotrainee.com/forum.php/

浙公网安备 33010602011771号