1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
|
/* embly.y - EMBL sequence parser */
%{
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#ifdef STDC_HEADERS
# include <stdlib.h>
#endif
#include "sequence.h"
#include "sequence/parse.h"
#include "sequence/embly.h"
void emblset_in(FILE *);
int embllex(YYSTYPE *);
int embllex_destroy(void);
static void yyerror(sequence_t *, const char *);
int embly_check(FILE *);
sequence_t *embly_parse(FILE *);
%}
%pure-parser
%parse-param { sequence_t *seq }
%union { char *str; int num; }
%destructor {} <num>
%destructor { free($$); } <str>
%token EOL ERR INT MIN NUL NUM QUO SEP SPC TER TER2 TXT
%token CBR OBR
%token ID
%token <str> IDNAM
%token AC
%token <str> ACNUM
%token SV
%token <num> SVNUM
%token PR PRDAT
%token DT
%token DE
%token <str> DETXT
%token KW
%token <str> KWORD KWOR2
%token OS
%token <str> OSTXT
%token OC
%token <str> OCNOD OCNO2
%token OG
%token RN
%token RC
%token RP
%token RX RXDB RXID
%token RG
%token RA RAAUT
%token RT
%token RL
%token DR
%token AH
%token AS
%token FH
%token FT
%token CC
%token CO
%token SQ
%token SEQ
%token <str> SEQBAS
%token END
%%
embl : annots sequence end { return 0; /*NOTREACHED*/ }
| NUL { return -1; /*NOTREACHED*/ }
;
annots : locus access vers proj date desc kwords orgas refs dbref comm asmb feats ;
locus : idline ;
idline : ID IDNAM idtxt EOL { parse_namadd(seq, $2); }
;
idtxt : idtxt TXT | ;
access : access acline | ;
acline : AC aclist TER EOL ;
aclist : aclist SEP ACNUM { parse_accadd(seq, $3); }
| ACNUM { parse_accadd(seq, $1); }
;
vers : svline | ;
svline : SV ACNUM SEP SVNUM EOL { parse_veradd(seq, $4); }
;
proj : prline | ;
prline : PR prlist TER EOL ;
prlist : prlist SEP PRDAT | PRDAT ;
date : dtline dtline | ;
dtline : DT dttxt EOL ;
dttxt : dttxt TXT | TXT ;
desc : desc deline | ;
deline : DE detxt EOL ;
detxt : detxt DETXT { parse_dscadd2(seq, $2); }
| DETXT { parse_dscadd1(seq, $1); }
;
kwords : kwlines kwline2 | ;
kwlines : kwlines kwline1 | kwlines kwline3 | ;
kwline1 : KW kwlist TER2 EOL ;
kwline2 : KW kwlist TER EOL ;
kwline3 : KW kwlist EOL ;
kwlist : kwlist SEP KWORD { parse_kwdadd(seq, $3); }
| KWORD { parse_kwdadd(seq, $1); }
| KWOR2 { parse_kwdupd(seq, $1); }
| /* Empty */
;
orgas : orgas orgent | ;
orgent : orgspe orgcla orggan ;
orgspe : osline ;
osline : OS ostxt EOL ;
ostxt : OSTXT ;
orgcla : oclines ocline2 | ;
oclines : oclines ocline1 | ;
ocline1 : OC oclist TER2 EOL | OC oclist EOL ;
ocline2 : OC oclist TER EOL ;
oclist : oclist SEP ocnode | ocnode | ;
ocnode : OCNOD | OCNO2 ;
orggan : orggan ogline | ;
ogline : OG ogtxt EOL ;
ogtxt : ogtxt TXT | TXT ;
refs : refs refent | ;
refent : refnum refcom refpos refxdb refgrp refaut reftit refloc ;
refnum : rnline ;
rnline : RN OBR INT CBR EOL ;
refcom : refcom rcline | ;
rcline : RC rctxt EOL ;
rctxt : rctxt TXT | TXT ;
refpos : rplines rpline2 | ;
rplines : rplines rpline1 | ;
rpline1 : RP rplist TER EOL ;
rpline2 : RP rplist EOL ;
rplist : rplist SEP rpval | rpval ;
rpval : INT MIN INT ;
refxdb : refxdb rxline | ;
rxline : RX RXDB SEP RXID TER EOL ;
refgrp : refgrp rgline | ;
rgline : RG rgtxt EOL ;
rgtxt : rgtxt TXT | TXT ;
refaut : ralines raline2 ;
ralines : ralines raline1 | ;
raline1 : RA ralist TER2 EOL ;
raline2 : RA ralist TER EOL ;
ralist : ralist SEP RAAUT | RAAUT | ;
reftit : rtline1 rtlines rtline3 | rtline | ;
rtlines : rtlines rtline2 | ;
rtline : RT QUO rttxt QUO TER EOL | RT TER EOL ;
rtline1 : RT QUO rttxt EOL ;
rtline2 : RT rttxt EOL ;
rtline3 : RT rttxt QUO TER EOL | RT QUO TER EOL ;
rttxt : rttxt TXT | TXT ;
refloc : refloc rlline | rlline ;
rlline : RL rltxt EOL ;
rltxt : rltxt TXT | TXT ;
dbref : dbref drline | ;
drline : DR drtxt EOL ;
drtxt : drtxt TXT | TXT ;
feats : feathead featdata | ;
asmb : ahline aslines | ;
ahline : AH ahtxt EOL ;
ahtxt : ahtxt TXT | TXT ;
aslines : aslines asline | asline ;
asline : AS astxt EOL ;
astxt : astxt TXT | TXT ;
feathead : fhline fhline ;
fhline : FH fhtxt EOL ;
fhtxt : fhtxt TXT | ;
featdata : featdata ftline | ftline ;
ftline : FT fttxt EOL ;
fttxt : fttxt TXT | TXT ;
comm : comm ccline | ;
ccline : CC cctxt EOL ;
cctxt : cctxt TXT | ;
sequence : seqcont seqhead seqdata | seqcont | seqhead seqdata ;
seqcont : seqcont coline | coline ;
coline : CO cotxt EOL ;
cotxt : cotxt TXT | TXT ;
seqhead : sqline ;
sqline : SQ sqtxt EOL ;
sqtxt : sqtxt TXT | TXT ;
seqdata : seqdata seqline | seqline ;
seqline : SEQ seqlist seqnum EOL ;
seqlist : seqlist SEP SEQBAS { parse_stradd(seq, $3); }
| SEQBAS { parse_stradd(seq, $1); }
;
seqnum : SPC INT | ;
end : END EOL ;
%%
/* Checks EMBL sequence */
int embly_check(FILE *f) {
int i;
emblset_in(f);
i = yyparse(NULL);
if (i == 0) { return i; }
embllex_destroy();
return i; }
/* Parse EMBL sequence */
sequence_t *embly_parse(FILE *f) {
int i;
sequence_t *seq;
seq = sequence_new();
if (seq == NULL) { return seq; }
emblset_in(f);
i = yyparse((void *)seq);
if (i == 0) {
return seq; }
embllex_destroy();
sequence_free(seq);
return NULL; }
/* Helpers ... */
/*ARGSUSED*/
static void yyerror(sequence_t *seq, const char *s) { return; }
|