python中实现DNA序列一致性计算
001、
[root@pc1 test01]# ls a.fa test.py [root@pc1 test01]# cat a.fa ## 测试fasta文件 >Rosalind_1 ATCCAGCT >Rosalind_2 GGGCAACT >Rosalind_3 ATGGATCT >Rosalind_4 AAGCAACC >Rosalind_5 TTGGAACT >Rosalind_6 ATGCCATT >Rosalind_7 ATGGCACT [root@pc1 test01]# cat test.py ## 统计程序 #!/usr/bin/env python3 # -*- coding: utf-8 -*- import pysam base = "ACGT" profile = [] in_file = pysam.FastxFile("a.fa") for i in in_file: if not profile: profile = [[0] * len(i.sequence) for j in base] for m,n in enumerate(i.sequence): profile[base.index(n)][m] += 1 list1 = [[] for i in profile[0]] for i in profile: for j in range(len(i)): list1[j].append(i[j]) for i in list1: print(base[i.index(max(i))], end = "") print("") for i,j in enumerate(base): tmp = [str(i) for i in profile[i]] print(j + ":" + "".join(tmp))
[root@pc1 test01]# ls a.fa test.py [root@pc1 test01]# python3 test.py ## 计算结果 ATGCAACT A:51005500 C:00142061 G:11630100 T:15000116

。

浙公网安备 33010602011771号