check fasta format

reference: https://www.biostars.org/p/42126/

fasta.y

%{
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

int yylex();
int yyerror( char* message);
%}
%error-verbose
%token LT OTHER SYMBOL CR
%start input
%%

input:   input  sequence | optspaces sequence;
sequence: head body optspaces;
head: LT anylist CR | LT CR;
anylist: anylist any | any;
any: LT | OTHER | SYMBOL;
body: symbols CR | body symbols CR ;
symbols: symbols symbol | symbol ;
symbol: SYMBOL;
optspaces: | crlist;
crlist: crlist CR | CR;

%%
int yyerror( char* message)
    {
    fprintf(stderr,"NOT A FASTA %s\n",message);
    exit(EXIT_FAILURE);
    return -1;
    }
int yylex()
    {
    int c=fgetc(stdin);
    switch(c)
        {
        case EOF: return c;
        case '>' : return LT;
        case '\n' : return CR;
        default: return isalpha(c)?SYMBOL:OTHER;
        }
    }

int main(int argc, char** argv)
    {
    return yyparse();
    }

#compile
bison fasta.y
gcc -Wall -O3 fasta.tab.c

#test
$ ./a.out < ~/file.xml
NOT A FASTA syntax error, unexpected OTHER, expecting LT

$ ./a.out < ~/rotavirus.fasta
$
posted @ 2016-09-11 15:58  liuhui_pine  阅读(216)  评论(0编辑  收藏  举报