1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
/* fastal.l - FASTA sequence lexer */
%{
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#ifdef STDC_HEADERS
# include <stdlib.h>
# include <string.h>
#endif
#include "sequence/fastay.h"
%}
%option bison-bridge
%option never-interactive
%option noinput nounput noyywrap
%x HEAD DESC SEQS
alp [[:alpha:]]
blk [[:blank:]]
dot "."
eol "\n"
min "-"
pip "|"
sem ";"
spc " "
sta "*"
sup ">"
tab "\t"
til "~"
nam [^ \n]{1,80}
gap ({min}|{til})
bas ({alp}|{sta}|{gap}){1,80}
txt .{1,80}
nbr ([PF]1|[DR][LC]|N[13]){sem}[^ \n]+
%%
^{blk}*{eol} ; /* Empty lines ignored */
^{sup} { BEGIN HEAD; return SUP; }
<HEAD>{nbr}/{eol} { return ERR; } /* FIXME: Reject NBRF header */
<HEAD>{nam} { yylval->str = strdup(yytext); return NAM; }
<HEAD>{spc}+ { BEGIN DESC; return SPC; }
<HEAD>{eol} { BEGIN SEQS; return EOL; }
<DESC>{txt} { yylval->str = strdup(yytext); return TXT; }
<DESC>{eol} { BEGIN SEQS; return EOL; }
<SEQS>{bas} { yylval->str = strdup(yytext); return BAS; }
<SEQS>{blk} ; /* Spaces/Tabs ignored */
<SEQS>{eol} ; /* Newlines ignored */
<SEQS>{eol}/{sup} { BEGIN INITIAL; return END; }
<SEQS><<EOF>> { BEGIN INITIAL; return END; }
<SEQS>. { return ERR; }
<<EOF>> { return NUL; }
{eol} { return ERR; }
. { return ERR; }
%%
|