File: elm.peg

package info (click to toggle)
codelite 17.0.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 136,384 kB
  • sloc: cpp: 491,550; ansic: 280,393; php: 10,259; sh: 8,930; lisp: 7,664; vhdl: 6,518; python: 6,020; lex: 4,920; yacc: 3,123; perl: 2,385; javascript: 1,715; cs: 1,193; xml: 1,110; makefile: 805; cobol: 741; sql: 709; ruby: 620; f90: 566; ada: 534; asm: 464; fortran: 350; objc: 289; tcl: 258; java: 157; erlang: 61; pascal: 51; ml: 49; awk: 44; haskell: 36
file content (671 lines) | stat: -rw-r--r-- 20,515 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
# Copyright (c) 2022 Nik Silver
#
# This source code is released for free distribution under the terms of the
# GNU General Public License version 2 or later.
#
# Thanks to:
# - Mark Skipper, for the original Elm optlib parser, which inspired this;
# - Samuel Stauffer, for the Thrift PEG parser, which showed me how to
#   write a PEG parser;
# - Jan DolinĂ¡r, for the Kotlin PEG parser, which also provided insight;
# - Masatake YAMATO, for patience and guidance in code reviews.
#
# This parser generates tags for Elm. See https://elm-lang.org/docs/syntax
# for language reference.
#
# The parser will tag items reliably at the top level. Functions
# defined in let/in blocks are also tagged, but with limitations. See below.
#
# Kinds
# - m module
# - n namespace (ie a module that's renamed)
# - t type
# - c constructor (within a type)
# - a alias
# - p port
# - f function
#
# Key/value pairs
# - roles:def       This is defined here.
# - roles:imported  This is imported here.
# - type:<t>        This constructor is in the scope of type <t>, which
#                   may be dotted. Eg Main.myType.
# - function:<f>    This function is in the scope of function <f>, which
#                   may be dotted. Eg Main.myFunc.
# - module:<m>      This is in the scope of module <m>.
# - typeref:description:<t>   This function, constructor or port
#                   has type <t>.
# - moduleName:<m>  This namespace has original module name <m>.
#
# Functions defined in let/in blocks may be tagged, with these limitations:
# - the LHS (up to and including the '=') need to be on a single line;
# - the LHS can only have simple parameters;
# - their scope is only marked as being in the top-most function;
# - any type annotation is ignored.
# This should be good for 90% of inner functions. To make it totally robust
# is much more complicated due to (a) Elm's clever indentation-sensitivity
# and (b) limitations of the PEG parser used here.
#
# To do:
# Maybe do:
# - let/in blocks
#   - Allow tuples on the LHS. Eg '(val1, val2) = valFunc'.
#   - Inner functions' type annotations are used in the function's
#     type description.
#   - Inner functions can have more complex parameters.
# - Functions
#   - Allow non-Latin upper and lower case. Use
#     https://util.unicode.org/UnicodeJsps/properties.html
#     combined with \p{Lu}, \p{Ll} and \p{L}.
#
# Won't do:
# - Handle Elm's indentation properly.


%prefix "pelm"

%auxil	"struct parserCtx *"

%earlysource {
    #include "general.h"
}

%header {
	struct parserCtx;
}

%source {
#include "elm_pre.h"
#include "routines.h"

/*
 * Include these lines to debug the parsing.
 * From https://github.com/arithy/packcc#macros
 * This will output parsing info to STDERR.tmp in the vent of a failed test.
 */

/*
static const char *dbg_str[] = { "Evaluating rule", "Matched rule", "Abandoning rule" };

#define PCC_DEBUG(auxil, event, rule, level, pos, buffer, length) \
    fprintf(stderr, "%*s%s %s @%zu [%.*s]\n", \
        (int)((level) * 2), "", dbg_str[event], rule, pos, (int)(length), buffer)
 */
}

# Top level elements -----------------------------------------------------

# We separate the file into the module section and the main section
# so that we only consider and tag one module declaration

file <-
    {
        ELM_INIT_MODULE_SCOPE;
    }
    TLSS?
    moduleDeclaration?
    TLSS?
    mainTopLevelStatements?
    TLSS?
    EOF

mainTopLevelStatements <-
    topLevelStatement (TLSS topLevelStatement)*

topLevelStatement <-
    importStatement
    / typeAlias
    / customType
    / portDeclaration
    / functionWithTypeAnnotation
    / functionDefinition
    / ignoreRestOfStatement

# Main Elm grammar -------------------------------------------------------

# Module declaration
#
# We can be a bit relaxed about distinguishing functions, types and
# constructors listed in a module declaration, because we're not going
# to tag them.

moduleDeclaration <-
    ('port' _1_)? 'module' _1_ <dottedIdentifier> _1_ 'exposing' _0_ '(' exposedList ')' EOS {
        elm_module_scope_index = makeElmTagSettingScope(auxil, $1, $1s, K_MODULE, ROLE_DEFINITION_INDEX);
    }

exposedList <- _0_ exposedItem _0_ (',' _0_ exposedList )*

exposedItem <-
    exposedFieldOrType
    / exposedFunction
    / exposedItemIgnored

exposedFieldOrType <-
    <upperStartIdentifier> (_0_ '(' _0_ exposedTypeConstructorList _0_ ')')?

exposedFunction <-
    lowerStartIdentifier

exposedItemIgnored <- '.'+

exposedTypeConstructorList <-
    (upperStartIdentifier / exposedItemIgnored) _0_ (',' _0_ exposedTypeConstructorList)*

# Type alias
#
# We don't care what the actual alias is

typeAlias <-
    'type' _1_ 'alias' _1_ <upperStartIdentifier> _0_ '=' _0_ ignoreRestOfStatement {
        makeElmTag(auxil, $1, $1s, K_ALIAS, ROLE_DEFINITION_INDEX);
    }

# Custom type
#
# Includes type parameters, such as 'x' in 'type MyType x = Wrap x'.
#
# In a definition such as 'type MyType = Cons1 String Int' we
# capture 'MyType', and then for each type in each constructor
# subtype (here, 'String' and 'Int') we append a '->' and finally
# concatentate them all to get the constructor's type description,
# such as 'String -> Int -> MyType'

customType <-
    'type' _1_ <upperStartIdentifier> (_0_ typeParameterList)? _0_ '=' _0_ {
        initElmConstructorFields(auxil, $1);
        makeElmTagSettingScope(auxil, $1, $1s, K_TYPE, ROLE_DEFINITION_INDEX);
    } constructorList EOS {
        POP_SCOPE(auxil);
        tidyElmConstructorFields(auxil);
    }

typeParameterList <- lowerStartIdentifier (_1_ lowerStartIdentifier)*

# A type could be defined as a constructor list:
#     type A = Cons1 String | Cons2 Float Float | ...
# The 'String' and the 'Float Float' etc are the constructor subtypes.
# Each 'String', 'Float', etc is a single type spec.
# But a single type spec could also be a record, a tuple or a function spec.
#
# Subtypes in constructors need to be parsed differently from types in
# type annotations and record fields. Consider these:
#     type A1Type a b = A1Cons a b              -- Line 1
#     type A2Type a b = A2Cons String a b       -- Line 2
#     type BType a b = BCons { x : A2Type a b}  -- Line 3
#     cFunc : A1Type String Int -> String       -- Line 4
# In line 1, 'a b' must be parsed as two individual types (parameterised).
# In line 2, 'String a b' must be parsed as three individual types.
# In line 3, 'A2Type a b' must be parsed as one type, even though it's
# lexically equivalent to 'String a b' on line 2.
# In line 4, 'A1Type String Int' must also be parsed one type.
# This means we have to have slightly different rules for parsing a
# constructor's subtypes as from other cases. The first case is handled
# by constructorSubtypeList and singleConstructorSubtypeSpec. The second
# case is handled by singleTypeSpec.

constructorList <- <upperStartIdentifier> {
        initElmConstructorSubtypeFields(auxil);
    } _0_ <constructorSubtypeList>? {
        int r = makeElmTag(auxil, $1, $1s, K_CONSTRUCTOR, ROLE_DEFINITION_INDEX);
        addElmConstructorTypeRef(auxil, r);
    } _0_ ('|' _0_ constructorList)?

constructorSubtypeList <- singleConstructorSubtypeSpec (_0_ singleConstructorSubtypeSpec)*

singleConstructorSubtypeSpec <-
    < recordTypeSpec
      / tupleTypeSpec
      / functionTypeSpec
      / dottedIdentifier
    >
    {
        addElmConstructorSubtype(auxil, $1);
    }

singleTypeSpec <-
    recordTypeSpec
    / tupleTypeSpec
    / functionTypeSpec
    / parameterisedTypeSpec

recordTypeSpec <-
    '{' (_0_ recordRestrictionPrefix)? _0_ fieldSpec (_0_ ',' _0_ fieldSpec)* _0_ '}'
    / '{' (_0_ recordRestrictionPrefix)? _0_ '}'

recordRestrictionPrefix <-
    lowerStartIdentifier _0_ '|'

fieldSpec <-
    lowerStartIdentifier _0_ ':' _0_ singleTypeSpec

tupleTypeSpec <-
    '(' _0_ singleTypeSpec (_0_ ',' _0_ singleTypeSpec)* _0_ ')'
    / '(' _0_ ')'

parameterisedTypeSpec <-
    dottedIdentifier (_1_ (singleTypeSpec / lowerStartIdentifier))*

functionTypeSpec <-
    singleTypeSpec (_0_ '->' _0_ singleTypeSpec)+

# Port declaration

portDeclaration <-
    'port' _1_ <lowerStartIdentifier> _0_ ':' _0_ <typeAnnotation> EOS {
        int r = makeElmTag(auxil, $1, $1s, K_PORT, ROLE_DEFINITION_INDEX);
        addElmTypeRef(r, $2);
    }

# Import statement
#
# For the import statement we don't want the imported items to appear in the
# scope of the current module (ie this file), otherwise they'll be named
# wrongly. So we # want to save the module scope, make the imported tags,
# then restore the module scope. We do this in two separate C code blocks,
# because the module scope needs to be saved before any of the imported tags
# are made.
#
# Also, if we create a namespace then that *does* live in the scope of the
# current module, so we'll make that tag (if needed) before saving the
# module scope.

importStatement <-
    'import' _1_ <dottedIdentifier> (_1_ 'as' _1_ <upperStartIdentifier>)? {
        // Make the namespace tag first, as it's in the file module's scope
        if ($2s > 0) {
            int r = makeElmTag(auxil, $2, $2s, K_NAMESPACE, ROLE_DEFINITION_INDEX);
            attachParserFieldToCorkEntry (r, ElmFields[F_MODULENAME].ftype, $1);
        }

        // Now make the tag for the imported module, as it lives outside
        // the scope of the file module
        ELM_SAVE_MODULE_SCOPE;
        makeElmTagSettingScope(auxil, $1, $1s, K_MODULE, ELM_MODULE_IMPORTED);
    } (_1_ 'exposing' _0_ '(' _0_ importedList _0_ ')')? EOS {
        ELM_RESTORE_MODULE_SCOPE;
    }

importedList <- importedItem _0_ (',' _0_ importedList)*

importedItem <-
    importedFunction
    / importedType
    / importedItemIgnored

importedFunction <- <lowerStartIdentifier> {
        makeElmTag(auxil, $1, $1s, K_FUNCTION, ELM_FUNCTION_EXPOSED);
    }

# When importing a type and constructors we want the constructors
# to be in the scope of the type. So we have to set the scope as the
# type first, before parsing (and making the tags for) the constructors.
# That's why the code here uses two separate C code blocks.

importedType <-
    <upperStartIdentifier> {
        makeElmTagSettingScope(auxil, $1, $1s, K_TYPE, ELM_TYPE_EXPOSED);
    } (_0_ '(' _0_ importedTypeConstructorList _0_ ')')? {
        // We're done with the type and its constructors, so we can pop it
        POP_SCOPE(auxil);
    }

importedItemIgnored <- '.'+

importedTypeConstructorList <-
    (importedTypeConstructor / importedItemIgnored) _0_ (',' _0_ importedTypeConstructorList)*

importedTypeConstructor <-
    <upperStartIdentifier> {
        makeElmTag(auxil, $1, $1s, K_CONSTRUCTOR, ELM_CONSTRUCTOR_EXPOSED);
    }

# Function with a type annotation.
#
# The type is on one line, and the function must follow immediately as
# the next top level statement

functionWithTypeAnnotation <-
    <lowerStartIdentifier> _0_ ':' _0_ <typeAnnotation> TLSS
    <$1> _1_ <functionParameterList>? {
        int r = makeElmTagSettingScope(auxil, $3, $3s, K_FUNCTION, ROLE_DEFINITION_INDEX);
        addElmTypeRef(r, $2);
        addElmSignature(r, $4);
    } _0_ '=' _0_ expression EOS {
        POP_SCOPE(auxil);
    }

typeAnnotation <-
    singleTypeSpec (_0_ '->' _0_ singleTypeSpec)*

# Function without a type annotation

functionDefinition <-
    <nonKeywordIdentifier> _0_ <functionParameterList>? {
        int r = makeElmTagSettingScope(auxil, $1, $1s, K_FUNCTION, ROLE_DEFINITION_INDEX);
        addElmSignature(r, $2);
    } _0_ '=' _0_ expression EOS {
        POP_SCOPE(auxil);
    }

# A function parameter list is what we define a function with. It's the
# x y z in 'fn x y z'. But of course they can be more complex, such as
# 'fn (Cons a b) ({ thing } as otherThing))' etc.

functionParameterList <- functionParameter (_0_ functionParameter)*

functionParameter <-
    plainFunctionParameter
    / tupleFunctionParameter
    / recordFunctionParameter
    / constructorFunctionParameter

plainFunctionParameter <-
    lowerStartIdentifier (_0_ asClause)?

tupleFunctionParameter <-
    '(' _0_ functionParameter (_0_ ',' _0_ functionParameter)* _0_ ')' (_0_ asClause)?

recordFunctionParameter <-
    '{' _0_ lowerStartIdentifier (_0_ ',' _0_ lowerStartIdentifier)* _0_ '}' (_0_ asClause)?

constructorFunctionParameter <-
    upperStartIdentifier (_0_ functionParameter)* (_0_ asClause)?

asClause <-
    'as' _1_ lowerStartIdentifier

# Expressions

expression <-
    (letInBlock _NL_IND_)? simpleExpression (_0_ binaryOperator _0_ expression)*

simpleExpression <-
    hexNumber
    / decimal
    / multilineString
    / characterLiteral
    / oneLineString
    / tupleExpression
    / listExpression
    / recordExpression
    / caseStatement
    / ifThenElseStatement
    / anonymousFunction
    / functionCall

tupleExpression <-
    '(' _0_ expression (_0_ ',' _0_ expression)* _0_ ')'
    / '(' _0_ ')'

listExpression <-
    '[' _0_ expression (_0_ ',' _0_ expression)* _0_ ']'
    / '[' _0_ ']'

recordExpression <-
    '{' _0_
    (lowerStartIdentifier _0_ '|' _0_)?
    recordExpressionAssignment (_0_ ',' _0_ recordExpressionAssignment)* _0_
    '}'
    / '{' _0_ '}'

recordExpressionAssignment <-
    lowerStartIdentifier _0_ '=' _0_ expression

anonymousFunction <-
    '\\' _0_ functionParameterList _0_ '->' _0_ expression

functionCall <-
    ( dottedIdentifier
      / '.' lowerStartIdentifier
      / '(' binaryOperator ')'
    ) (_1_ expression)*

# Let/in block
#
# We'll treat let/in blocks very simply - we'll consider each line
# and expect the whole line either to be the start of a function
# definition (perhaps with some of its body) or its body. So something
# like 'f x y =' will have to be on one line.

letInBlock <-
    'let' _NL_IND_
    letInLine (_NL_IND_ letInLine)* _NL_IND_
    'in'

letInLine <-
    letInFunctionDefinition
    / letInBlock
    / letInFunctionBody

letInFunctionDefinition <-
    <nonKeywordIdentifier> WS* <letInFunctionParameters>? WS* '=' Non_NL* {
        int r = makeElmTag(auxil, $1, $1s, K_FUNCTION, ROLE_DEFINITION_INDEX);
        addElmSignature(r, $2);
    }

letInFunctionParameters <-
    nonKeywordIdentifier (WS+ nonKeywordIdentifier)*

letInFunctionBody <-
    !('let' / 'in') Non_NL+

# Case statements
#
# We're going to be pretty loose with case statements, otherwise we'd
# have to follow Elm's indentation rules. So we'll just say
# the body of a case statement is a series of patterns like this:
# <something> -> <expression>. The <expression> might well swallow
# up a bit of the next case pattern (because to do otherwise requires
# following Elm's indentation rules), so that's why we just specify
# <something>.

caseStatement <-
    'case' _1_ expression _0_ 'of' _1_
    caseClauseList

caseClauseList <-
    caseClause (_1_ caseClause)*

caseClause <-
    roughCasePatternChar* '->' _0_ expression

roughCasePatternChar <-
    !('->' / TLSS / lineComment / delimitedComment / NL) .

# If/then/else statements

ifThenElseStatement <-
    'if' _1_ expression _1_
    'then' _1_ expression _1_
    'else' _1_ expression

# Binary operators

binaryOperator <-
    '>>' / '<<' / '|>' / '<|'
    / '//' / '++' / '::'
    / '==' / '/='
    / '&&' / '||'
    / '<=' / '>='
    / '<' / '>'
    / '+' / '-' / '*' / '/' / '^'

# Sometimes we just need to ignore the rest of the (top level) statement

ignoreRestOfStatement <-
    (multilineString / Non_WS_or_NL+) (_1_ ignoreRestOfStatement)*

multilineString <-
    '"""' (!'"""' .)* '"""'

# Low level tokens -------------------------------------------------------

# Identifiers

naiveIdentifier <- [A-Za-z_] alphanumeric*

upperStartIdentifier <- [A-Z] alphanumeric*

lowerStartIdentifier <- !keyword [a-z_] alphanumeric*

alphanumeric <- [A-Za-z0-9_]

nonKeywordIdentifier <-
    !keyword naiveIdentifier

keyword <-
    'type' !alphanumeric
    / 'module' !alphanumeric
    / 'port' !alphanumeric
    / 'alias' !alphanumeric
    / 'as' !alphanumeric
    / 'exposing' !alphanumeric
    / 'import' !alphanumeric
    / 'let' !alphanumeric
    / 'in' !alphanumeric
    / 'case' !alphanumeric
    / 'of' !alphanumeric
    / 'if' !alphanumeric
    / 'then' !alphanumeric
    / 'else' !alphanumeric

dottedIdentifier <- nonKeywordIdentifier ('.' nonKeywordIdentifier)*

# Numbers

decimal <-
    exponentialDecimal
    / simpleDecimal

exponentialDecimal <-
    simpleDecimal 'e' simpleInteger

simpleDecimal <-
    simpleInteger ('.' digits)?
    / '.' digits+

simpleInteger <- [-+]? digits

digits <- [0-9]+

hexNumber <- '0x' [0-9A-Fa-f]+

# One line strings and characters

oneLineString <- '"' inStringChar* '"'

characterLiteral <- "'" inStringChar "'"

inStringChar <-
    !('"' / NL)
    ( inStringUnicodeChar / inStringEscapedChar / inStringPlainChar )

inStringPlainChar <-
    !('"' / '\\' / NL) .

inStringEscapedChar <-
    '\\' !('u' / NL) .

inStringUnicodeChar <-
    '\\u{' [0-9A-Fa-f]+ '}'

# Ignorable things -------------------------------------------------------

# Simple things...

WS <- [ \t]+
NL <- '\n' / '\f' / '\r' '\n'?
Non_NL <- [^\n\r\f]
Non_WS_or_NL <- [^ \t\n\r\f]
EOF <- !.

# A delimited comment is effectively "nothing", even if it spans several
# lines. But it does separate two tokens.
#
# A line comment can only come at the end of a line. Notice here it doesn't
# include the actual newline.

delimitedComment <- '{-' (delimitedComment / !'-}' .)* '-}'

lineComment <- '--' Non_NL*

# Elm whitespacing is a bit special...
# - Two statements are at the same level (eg at the top level, or statements
#   in the same let...in block) only if they begin with the same indentation.
# - One line has more indentation than the previous line then it is a
#   continuation of that previous line.
# - But sometimes several statements can appear on the same line if tokens
#   make it obvious. Eg this is okay:
#   Eg: 'myFunc = let f x y = x + y in f 3 4'
#
# We'll only worry about top level statements for this part. But we still
# need to know
# - when a top level statement begins; and
# - when two sequential tokens are part of the same top level statement.
#   They may be separated by a combination of whitespace, comments, and
#   newlines, but if there is a newline then that will always be followed
#   by an indent.
#
# When considering how one token relates to the next in top level statements
# we should only need three kinds of "join"s:
# - Where we need whitespace, such as 'import MyModule', but that space
#   may occur over multiple lines. If it's over multiple lines, the
#   second token needs to be somewhat in from the first column of text.
#   We'll call this _1_ - ie at least one space.
# - Where we don't need whitespace, such as 'f = 3', but that space
#   may occur over multiple lines. If it's over multiple lines then again
#   the second token needs to be somewhat in from the first column of text.
#   We'll call this _0_ - ie possibly zero space.
# - When we've got an end of statement, and the next token is some
#   meaningful code (not a comment) and starts in the first column of text.
#   Then that next token is the start of the next top level statement.
#   We'll call this TLSS, for top level statement separator.
#
# We can define _1_ as
# - The longest possible sequence of whitespace, delimited comments,
#   newlines, and line comments, as long as it ends with a whitespace
#   or a delimited comment, because then it won't be in the first column.
#
# We can define _0_ as
# - _1_ or the empty string.
#
# We can define TLSS as
# - The longest possible sequence of whitespace, delimited comments,
#   newlines, and line comments, as long as it ends with a newline or EOF
#   (and there's no more ignorable characters after that).
#
# PEG parsing tip: If we want to define a sequence like 'the longest
# sequence of As, Bs and Cs, as long as it ends with C' we define a short
# sequence like 'the longest sequence of As and Bs, then a C' and then
# define 'the longest sequence of those'.

_1_short <-
    (lineComment / NL)* (WS / delimitedComment)

_1_ <- _1_short+


_0_ <- _1_ / ''

TLSS_short <-
    (WS / lineComment / delimitedComment)* (NL / EOF)

TLSS <-
    TLSS_short+
    !(WS / lineComment / delimitedComment)

# An end of statement marks the end of a top level statement, but
# doesn't consume anything

EOS <- &( TLSS / EOF )

# When considering lines in a let/in block we'll want to look for
# a newline and an indent. There may be some delimited comments etc
# in between.

_NL_IND_ <-
    TLSS_short+ WS+

%%
#include "elm_post.h"