坐山雕

导航

GFF2GTF.py2

import sys

inFile = open(sys.argv[1],'r')

for line in inFile:
  #skip comment lines that start with the '#' character
  if line[0] != '#':
    #split line into columns by tab
    data = line.strip().split('\t')

    ID = ''

    #if the feature is a gene 
    if data[2] == "gene":
      #get the id
      ID = data[-1].split('ID=')[-1].split(';')[0]

    #if the feature is anything else
    else:
      # get the parent as the ID
      ID = data[-1].split('Parent=')[-1].split(';')[0]
    
    #modify the last column
    data[-1] = 'gene_id "' + ID + '"; transcript_id "' + ID

    #print out this new GTF line
    print '\t'.join(data)

https://www.jianshu.com/p/c284a6b4e1c6

posted on 2021-04-22 17:25  坐山雕  阅读(53)  评论(0编辑  收藏  举报