File: icalendar.l

package info (click to toggle)
nmh 1.8-4
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 7,860 kB
  • sloc: ansic: 50,445; sh: 22,697; makefile: 1,138; lex: 740; perl: 509; yacc: 265
file content (368 lines) | stat: -rw-r--r-- 10,704 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
/* icalendar.l -- icalendar (RFC 5545) scanner
 *
 * This code is Copyright (c) 2014, by the authors of nmh.  See the
 * COPYRIGHT file in the root directory of the nmh distribution for
 * complete copyright information.
 */

/* See porting notes at end of this file. */

%{
#include "h/mh.h"
#include "sbr/charstring.h"
#include "h/icalendar.h"
#include "icalparse.h"
#include "sbr/base64.h"

static char *unfold (char *, size_t *);
static void destroy_icallex(void);
%}

/*
 * These flex options aren't used:
 *   8bit not needed
 *   case-insensitive not needed
 *   align not used because this isn't performance critical
 */
%option outfile="lex.yy.c" prefix="ical"
%option perf-report warn
%option never-interactive noinput noyywrap

              /*
               * From RFC 5545 § 3.1.
               */
name          {iana-token}|{x-name}
iana-token    ({ALPHA}|{DIGIT}|-)+
x-name        X-({vendorid}-)?({ALPHA}|{DIGIT}|-)+
vendorid      ({ALPHA}|{DIGIT}){3,}
param-name    {iana-token}|{x-name}
param-value   {paramtext}|{quoted-string}
paramtext     {SAFE-CHAR}*
value         {VALUE-CHAR}*
quoted-string {DQUOTE}{QSAFE-CHAR}*{DQUOTE}
QSAFE-CHAR    {WSP}|[\x21\x23-\x7E]|{NON-US-ASCII}
SAFE-CHAR     {WSP}|[\x21\x23-\x2B\x2D-\x39\x3C-\x7E]|{NON-US-ASCII}
VALUE-CHAR    {WSP}|[\x21-\x7E]|{NON-US-ASCII}
              /* The following is a short-cut definition that admits more
                 that the UNICODE characters permitted by RFC 5545. */
NON-US-ASCII  [\x80-\xF8]{2,4}
              /* The following excludes HTAB, unlike {CTL}. */
CONTROL       [\x00-\x08\x0A-\x1F\x7F]
EQUAL         =
              /* Solaris lex requires that the , be escaped. */
COMMA         \,
              /*
               * From RFC 5545 § 2.1.
               */
COLON         :
SEMICOLON     ;

              /*
               * From RFC 5545 § 3.3.11.
               */
text          ({TSAFE-CHAR}|:|{DQUOTE}|{ESCAPED-CHAR})*
ESCAPED-CHAR  \\\\|\\;|\\,|\\N|\\n
TSAFE-CHAR    {WSP}|[\x21\x23-\x2B\x2D-\x39\x3C-\x5B\x5D-\x7E]|{NON-US-ASCII|

              /*
               * Core rules (definitions) from RFC 5234 Appendix B.1.
               */
ALPHA         [\x41-\x5A\x61-\x7A]
BIT           [01]
CHAR          [\x01-\x7F]
CR            \x0D
              /* Variance from RFC 5234:  the {CR} is required in
                 CRLF, but it is optional below to support Unix
                 filesystem convention. */
CRLF          ({CR}?{LF})+
CTL           [\x00-\x1F\x7F]
DIGIT         [\x30-\x39]
DQUOTE        \x22
HEXDIG        {DIGIT}|[A-F]
HTAB          \x09
LF            \x0A
LWSP          ({WSP}|({CRLF}{WSP}))*
OCTET         [\x00-\xFF]
SP            \x20
VCHAR         [\x21-\x7E]
WSP           {SP}|{HTAB}

/*
 * Our definitions.
 */
fold                 {CRLF}{WSP}
folded-name          {name}({fold}+{iana-token})+
folded-param-name    {param-name}({fold}+{iana-token})+
folded-quoted-string {DQUOTE}{QSAFE-CHAR}*{fold}+{QSAFE-CHAR}*{DQUOTE}
folded-param-value   {paramtext}({fold}{paramtext}*)+|{folded-quoted-string}
folded-value         {VALUE-CHAR}*({fold}{VALUE-CHAR}*)+

%s s_name s_colon s_value s_semicolon s_param_name s_equal s_comma

%%

<INITIAL>
{CRLF} {
    /* Eat any leading newlines. */
}

<INITIAL>
{folded-name} {
    /* flex 2.5.4 defines icalleng as an int instead of a size_t,
       so copy it. */
    size_t len = icalleng;
    unfold (icaltext, &len);
    icalleng = len;

    icallval = strdup (icaltext);
    /* yy_push_state (s_name);         * s_name */
    BEGIN (s_name);                   /* s_name */
    return ICAL_NAME;
}

<INITIAL>
{name} {
    icallval = strdup (icaltext);
    /* yy_push_state (s_name);         * s_name */
    BEGIN (s_name);                   /* s_name */
    return ICAL_NAME;
}

<s_name>
{COLON} {
    /* Don't need to strdup a single character. */
    icallval = icaltext;
    /* yy_pop_state ();                * INITIAL */
    /* yy_push_state (s_colon);        * s_colon */
    BEGIN (s_colon);                  /* s_colon */
    return ICAL_COLON;
}

<s_colon>
{folded-value} {
    /* flex 2.5.4 defines icalleng as an int instead of a size_t,
       so copy it. */
    size_t len = icalleng;
    unfold (icaltext, &len);
    icalleng = len;

    icallval = strdup (icaltext);
    /* yy_pop_state ();                * INITIAL */
    /* yy_push_state (s_value);        * s_value */
    BEGIN (s_value);                  /* s_value */
    return ICAL_VALUE;
}

<s_colon>
{value} {
    icallval = strdup (icaltext);
    /* yy_pop_state ();                * INITIAL */
    /* yy_push_state (s_value);        * s_value */
    BEGIN (s_value);                  /* s_value */
    return ICAL_VALUE;
}

<s_name>
{SEMICOLON} {
    /* Don't need to strdup a single character. */
    icallval = icaltext;
    /* yy_push_state (s_semicolon);    * s_name, s_semicolon */
    BEGIN (s_semicolon);              /* s_name, s_semicolon */
    return ICAL_SEMICOLON;
}

<s_semicolon>
{folded-param-name} {
    /* flex 2.5.4 defines icalleng as an int instead of a size_t,
       so copy it. */
    size_t len = icalleng;
    unfold (icaltext, &len);
    icalleng = len;

    icallval = strdup (icaltext);
    /* yy_pop_state ();                * s_name */
    /* yy_push_state (s_param_name);   * s_name, s_param_name */
    BEGIN (s_param_name);             /* s_name, s_param_name */
    return ICAL_PARAM_NAME;
}

<s_semicolon>
{param-name} {
    icallval = strdup (icaltext);
    /* yy_pop_state ();                * s_name */
    /* yy_push_state (s_param_name);   * s_name, s_param_name */
    BEGIN (s_param_name);             /* s_name, s_param_name */
    return ICAL_PARAM_NAME;
}

<s_param_name>
{EQUAL} {
    /* Don't need to strdup a single character. */
    icallval = icaltext;
    /* yy_pop_state ();                * s_name */
    /* yy_push_state (s_equal);        * s_name, s_equal */
    BEGIN (s_equal);                  /* s_name, s_equal */
    return ICAL_EQUAL;
}

<s_equal,s_comma>
{folded-param-value} {
    /* flex 2.5.4 defines icalleng as an int instead of a size_t,
       so copy it. */
    size_t len = icalleng;
    unfold (icaltext, &len);
    icalleng = len;

    icallval = strdup (icaltext);
    /* yy_pop_state ();                * s_name */
    BEGIN (s_name);                   /* s_name */
    return ICAL_PARAM_VALUE;
}

<s_equal,s_comma>
{param-value} {
    icallval = strdup (icaltext);
    /* yy_pop_state ();                * s_name */
    BEGIN (s_name);                   /* s_name */
    return ICAL_PARAM_VALUE;
}

<s_name>
{COMMA} {
    /* Don't need to strdup a single character. */
    icallval = icaltext;
    /* yy_push_state (s_comma);        * s_name, s_comma */
    BEGIN (s_comma);                  /* s_name, s_comma */
    return ICAL_COMMA;
}

<s_value>
{CRLF} {
    /* Use start condition to ensure that all newlines are where expected. */
    icallval = icaltext;
    /* yy_pop_state ();                * INITIAL */
    BEGIN (INITIAL);                  /* INITIAL */
    return ICAL_CRLF;
}

<s_colon>
{CRLF} {
    /* Null value. */
    icallval = strdup ("");
    /* yy_pop_state ();                * INITIAL */
    /* yy_push_state (s_value);        * s_value */
    BEGIN (s_value);                  /* s_value */
    /* Push the newline back so it can be handled in the proper state. */
    unput ('\n');
    return ICAL_VALUE;
}

. {
    /* By default, flex will just pass unmatched text.  Catch it instead. */
    contentline *clines = vevents.last->contentlines;
    contentline *cline;

    if (clines  &&  (cline = clines->last)) {
        if (cline->unexpected == NULL) {
            cline->unexpected = charstring_create (0);
        }
        charstring_append_cstring (cline->unexpected, icaltext);
    }
}

<INITIAL>
<<EOF>> {
    /* See next rule for when start state is not INITIAL. */
    destroy_icallex ();
    yyterminate ();
}

<<EOF>> {
    /* Missing a final newline after a token.  The input does not conform
       to RFC 5545 § 3.1, which requires that each contentline end with a
       CRLF.  (Assume that the token is at the end of a contentline.)  Be
       liberal in what we accept by faking a newline here, and setting the
       start state to terminate on the next call. */
    BEGIN (INITIAL);
    return ICAL_CRLF;
}

%%

static char *
unfold (char *text, size_t *leng)
{
    /* It's legal to shorten text and modify leng (because we don't
       use yymore()). */
    char *cp;

    /* First squash any CR-LF-WSP sequences. */
    while ((cp = strstr (text, "\r\n "))  ||  (cp = strstr (text, "\r\n\t"))) {
        /* Subtract any characters prior to fold sequence and 3 for
           the fold sequence, and add 1 for the terminating null. */
        (void) memmove (cp, cp + 3, *leng - (cp - text) - 3 + 1);
        *leng -= 3;
    }

    /* Then squash any LF-WSP sequences. */
    while ((cp = strstr (text, "\n "))  ||  (cp = strstr (text, "\n\t"))) {
        /* Subtract any characters prior to fold sequence and 2 for
           the fold sequence, and add 1 for the terminating null. */
        (void) memmove (cp, cp + 2, *leng - (cp - text) - 2 + 1);
        *leng -= 2;
    }

    return text;
}


/*
 * To clean up memory, call the function provided by modern
 * versions of flex.  Older versions don't have it, and of
 * course this won't do anything if the scanner was built
 * with something other than flex.
 */
static void
destroy_icallex(void)
{
#if defined FLEX_SCANNER  &&  defined YY_FLEX_SUBMINOR_VERSION
    /* Hack:  rely on fact that the the YY_FLEX_SUBMINOR_VERSION
       #define was added to flex (flex.skl v. 2.163) after
       #yylex_destroy() was added. */
    icallex_destroy ();
#endif /* FLEX_SCANNER  &&  YY_CURRENT_BUFFER_LVALUE */
}

/*
 * See comment in h/icalendar.h about having to provide these
 * because flex 2.5.4 doesn't.
 */
void
icalset_inputfile (FILE *file)
{
    yyin = file;
}

void
icalset_outputfile (FILE *file)
{
    yyout = file;
}

/*
 * Porting notes
 * -------------
 * POSIX lex only supports an entry point name of yylex().  nmh
 * programs can contain multiple scanners (see sbr/dtimep.l), so
 * nmh requires the use of flex to build them.
 * In addition, if there is a need to port this to Solaris lex:
 *  - Use the lex -e or -w option.
 *  - Comment out all of the %options.
 *  - Comment out the <<EOF>> rule.
 *  - The start condition and pattern must be on the same line.
 *  - Comments must be inside rules, not just before them.
 *  - Don't use start condition stack.  In the code, above BEGIN's are
 *    used instead, and the contents of an imaginary start condition
 *    stack are shown after each.  The stack operations are also shown
 *    in comments.
 */