从基因组里面按照染色体和索引提取基因序列

简单而言就是 pos要少一位数

		pos=row["pos"]-1
		seq = genome[chrom][pos]
from pyensembl import EnsemblRelease
from pyfaidx import Fasta
ensembl = EnsemblRelease(110)  
# 加载本地基因组FASTA文件
genome = Fasta('../datasets_make/Homo_sapiens.GRCh38.dna.primary_assembly.fa')

sum_num=0
suc=0
for idx, row in df.iterrows():
    chrom=row["chrom"]
    pos=row["pos"]-1
    seq = genome[chrom][pos]
    if str(seq).upper()==row["ref"].upper():
        suc+=1
    sum_num+=1
print(suc/sum_num)
posted @ 2025-11-25 18:28  ylifs  阅读(5)  评论(0)    收藏  举报