File: cif_parser.y

package info (click to toggle)
cif-api 0.4.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,472 kB
  • sloc: ansic: 19,411; lex: 246; makefile: 201; sql: 164; yacc: 115; sh: 37
file content (264 lines) | stat: -rw-r--r-- 8,340 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/*
 * Bison definitions for a CIF 2.0 parser skeleton
 *
 * This file is free and unencumbered software released into the public
 * domain.
 * 
 * Anyone is free to copy, modify, publish, use, compile, sell, or
 * distribute this software, either in source code form or as a compiled
 * binary, for any purpose, commercial or non-commercial, and by any
 * means.
 * 
 * In jurisdictions that recognize copyright laws, the author or authors
 * of this software dedicate any and all copyright interest in the
 * software to the public domain. We make this dedication for the benefit
 * of the public at large and to the detriment of our heirs and
 * successors. We intend this dedication to be an overt act of
 * relinquishment in perpetuity of all present and future rights to this
 * software under copyright law.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

/*
 * Bison options specify a reentrant (pure) parser accepting an input stream and a (dummy) scanner pointer as arguments
 * to its main function.  It will interface with a lexical analyzer that takes an additional "scanner" argument of type
 * void *, and it will produce verbose error messages and track location within the input.
 *
 * The "scanner" parse parameter is a bit of a hack -- its value on entry to the parse routine is ignored and can be
 * NULL -- it is the only means Bison provides to inject a local variable scoped to the whole parse function.
 */
%pure-parser
%parse-param { FILE *input_file }
%parse-param { void *scanner }
%lex-param   { void *scanner }
%error-verbose
%locations

%{
#include <stdio.h>

/* function prototypes, most of them for the lexical analyzer interface */
int yylex(void *lvalp, void *llocp, void *scanner);
int yylex_init(void **);
int yylex_destroy(void *);
void yyset_in(FILE *in, void *scanner);
void yyerror(void *locp, FILE *input_file, void *scanner, const char *message);
%}

%initial-action {
    /* initialize a reentrant scanner and point it at the provided input stream */
    int lex_init_result = yylex_init(&scanner);

    if (lex_init_result != 0) return lex_init_result;
    yyset_in(input_file, scanner);
}

/* terminal tokens in the generated parser's grammar */
%token BLOCK_HEADER
%token FRAME_HEADER
%token FRAME_TERMINATOR
%token LOOP
%token DATANAME
%token LIST_OPEN
%token LIST_CLOSE
%token TABLE_OPEN
%token TABLE_CLOSE
%token KEY
%token TEXT_FIELD
%token SIMPLE_VALUE
%token VCOMMENT
%token WS

/*
 * The grammar's start symbol -- that is, the non-terminal symbol that represents the complete grammatical construct
 * the parser will attempt to construct.
 */
%start cif

%%

/*
 * An overall CIF (v2.0) starts with a version-identification comment, and may thereafter contain zero or more data
 * blocks delimited from the initial comment by whitespace.
 */
cif: VCOMMENT delim_blocks { yylex_destroy(scanner); }
  ;

/*
 * either nothing or whitespace followed by zero or more data blocks
 */
delim_blocks: /* empty */
    | whitespace
    | whitespace data_blocks
  ;

/*
 * One or more data blocks.  Because data blocks always end with whitespace, there is no need to provide for explicit
 * whitespace separation or to make special allowance for trailing whitespace.
 */
data_blocks: data_block
    | data_blocks data_block
  ;

/*
 * A data block consists of a block header and zero or more subsequent save frames, items, and loops, all separated
 * from each other by whitespace, with trailing whitespace.
 * Note: the requirement for trailing whitespace is artificial, though very commonly fulfilled in practice.  The
 * lexical analyzer is responsible for ensuring that it reports whitespace at the end of the input, whether there
 * actually is any or not, unless there are no data blocks.
 */
data_block: BLOCK_HEADER whitespace
    | data_block save_frame whitespace
    | data_block item whitespace
    | data_block loop                  /* loops already provide trailing whitespace */
  ;

/*
 * A save frame is constructed much the same as a data block, but may not contain nested save frames and has an
 * explicit terminator.
 */
save_frame:  frame_body FRAME_TERMINATOR
  ;

frame_body: FRAME_HEADER whitespace
    /* | frame_body save_frame whitespace */  /* nested save frames were dropped from CIF 2 */
    | frame_body item whitespace
    | frame_body loop                  /* loops already provide trailing whitespace */
  ;

/*
 * Data items consist of a tag (data name) and a value, separated from each other by whitespace.  If the value is a
 * text field, then the initial newline of its opening delimiter can do double duty as the separator.
 */
item:  DATANAME delim_value
  ;

/*
 * A loop consists of a header followed by a body with trailing whitespace
 */
loop: loop_header delim_lb
  ;

/*
 * A loop header consists of a loop_ keyword followed by a sequence of one or more tags, all separated from each other
 * by whitespace.
 */
loop_header: LOOP whitespace DATANAME
    | loop_header whitespace DATANAME
  ;

/*
 * A loop body consists of a sequence of data values, each separated from whatever precedes it by whitespace, with 
 * trailing whitespace.
 */
delim_lb: loop_body whitespace
  ;

loop_body: delim_value
    | loop_body TEXT_FIELD
    | delim_lb TEXT_FIELD
    | delim_lb non_delim_value
  ;

/*
 * A value of type LIST consists of a sequence of zero or more whitespace-separated values (of any type) delimited by
 * matching square brackets. Whitespace is optional between brackets and the values within, and between the brackets of
 * an empty list.
 */
list: LIST_OPEN list_values lclose
    | LIST_OPEN lclose
  ;

lclose: LIST_CLOSE
    | whitespace LIST_CLOSE
  ;

list_values: any_value
    | list_values delim_value
  ;

/*
 * A value of type TABLE consists of a sequence of zero or more whitespace-separated entries delimited by matching
 * curly braces. Whitespace is optional between brackets and the entries within, and between the braces of an empty
 * table.
 */
table: topen entries tclose
    |  topen TABLE_CLOSE
  ;

topen: TABLE_OPEN
    | TABLE_OPEN whitespace
  ;

tclose: TABLE_CLOSE
    | whitespace TABLE_CLOSE
  ;

entries: entry
    | entries whitespace entry
  ;

/*
 * Table entries consist of a key and a value (of any type), optionally separated by whitespace.
 */
entry: KEY any_value
  ;

any_value: delim_value
    | non_delim_value
;

/*
 * In most contexts, values need to be delimited from whatever precedes them by whitespace.  The initial newline of
 * a text field's leading delimiter can do double duty as separating whitespace, but additional whitespace is allowed
 * before a text field, too.
 */
delim_value: whitespace non_delim_value
    | whitespace TEXT_FIELD
    | TEXT_FIELD
  ;

/*
 * Most values do not have inherent leading separators.  These include bare values, quoted string values, lists, and
 * tables.
 */
non_delim_value: SIMPLE_VALUE
    | list
    | table
  ;

/*
 * The lexical analyzer is permitted to split runs of whitespace into multiple tokens
 */
whitespace: WS
    | whitespace WS
  ;

%%

/*
 * A function called by the parser to report grammar errors.  It is non-static, so it is available for routines to call
 * as well -- for instance, the lexical analyzer.
 *
 * The 'input_file' and 'scanner' arguments are ignored by this implementation; the parse routine expects to provide
 * them as a result of having them declared among its own parameters.  The 'lp' parameter is ignored if NULL, and
 * otherwise is expected to be a pointer to a structure describing the location of the error within the input, as
 * maintained by the parser's location-tracking code.
 */
void yyerror(void *lp, FILE *input_file, void *scanner, const char *msg) {
    YYLTYPE *locp = (YYLTYPE *) lp;

    if (locp == NULL) {
        fprintf(stderr, "%s\n", msg);
    } else {
        fprintf(stderr, "near line %d, column %d: %s\n", locp->first_line, locp->first_column, msg);
    }
}