C语言计算fastq文件GC含量
C语言小练习:计算非压缩fastq格式的GC含量
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #define buff 1024 5 6 typedef unsigned long long int u_llong; 7 8 static void usage(int num,const char *str) 9 { 10 if(num !=2) 11 { 12 fprintf(stderr,"usage: %s fqFile\n",str); 13 exit(0); 14 } 15 } 16 17 static u_llong* gcN(char base[buff]) 18 { 19 base[strlen(base)-1]='\0'; 20 21 int i; 22 static u_llong gactn[]={0,0,0,0,0}; 23 for(i=0; i<strlen(base); i++) 24 { 25 if(base[i]=='G') 26 gactn[0]++; 27 if(base[i]=='A') 28 gactn[1]++; 29 if(base[i]=='C') 30 gactn[2]++; 31 if(base[i]=='T') 32 gactn[3]++; 33 if(base[i]=='N') 34 gactn[4]++; 35 } 36 return gactn; 37 } 38 39 static void calc(const char *fqfile) 40 { 41 FILE *fq; 42 if((fq=fopen(fqfile,"r")) == NULL) 43 { 44 perror("fopen"); 45 exit(1); 46 } 47 //fprintf(stderr,"fq file <%s> open suceed!\n",fqfile); 48 49 char base[buff]; 50 char qual=0; 51 u_llong *p=NULL; 52 while((fgets(base,buff,fq))!= NULL) 53 { 54 if(base[0]=='@') 55 { 56 continue; 57 } 58 if(base[0]=='+') 59 { 60 qual=1; 61 continue; 62 } 63 if(qual==1) 64 { 65 qual=0; 66 continue; 67 } 68 69 p=gcN(base); // G A C T N 70 } 71 72 float GClevel,Nlevel; 73 u_llong sum=0; 74 for(int i=0; i<5; i++) 75 { 76 sum+=*(p+i); 77 } 78 GClevel=(float)(*p+*(p+2)) / sum * 100; 79 Nlevel=(float)(*p+4) / sum * 100; 80 81 fprintf(stdout,"G:%lld\tA:%lld\nC:%lld\tT:%lld\nN:%lld\tsum:%lld\n",*p,*(p+1),*(p+2),*(p+3),*(p+4),sum); 82 fprintf(stdout,"GC:%.2f%%\n",GClevel); 83 84 fclose(fq); 85 } 86 87 int main(int argc,const char *argv[]) 88 { 89 usage(argc,argv[0]); 90 calc(argv[1]); 91 92 exit(0); 93 }
作者:un-define
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利.