1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
|
#------------------------------------------------------------------------------
# Pegex Grammar for YAML 1.2
#
# This is a PEG (top-down) grammar for the YAML 1.2 language. It is in the
# Pegex format, and can be used to construct a YAML parser in any language
# where Pegex has been ported to. (Currently Perl, Ruby and JavaScript).
#
# Compared to the official YAML spec, this grammar should be much easier to
# read and understand. It will also be fully documented, and will attempt to
# have a test suite that exercises every rule path.
#
# The overall intent of this is to have one working grammar that backs up a
# full YAML framework implementation in every programming language where YAML
# is used. If this is acheived, then a bug in YAML can be fixed in one place,
# for every language's implementaion.
#
# This grammar will go further than just parsing correct YAML. It will also
# parse for common YAML errors, and try to report the most useful error
# messages.
#------------------------------------------------------------------------------
# Notes:
# - Indentation will be done with indent / ondent / undent
# - Need to check some rules against spec for accuracy.
# - Make the grammar strict as possible until justified.
# - Need to look for common errors in the grammar, and report them.
# - Need to have tests for known errors.
%grammar yaml
%version 0.0.1
#------------------------------------------------------------------------------
# High Level Constructs
#------------------------------------------------------------------------------
# A YAML Stream is the top level rule, and accounts for the entirety of the
# text being parsed. Basically, a stream is a set of zero or more documents.
yaml-stream:
stream-start
yaml-document*
stream-end
# A YAML Document is a single node of any kind. It may start with an optional
# explicit head marker, and may be terminated with an optional explicit foot
# marker.
yaml-document:
directive-yaml?
directive-tag*
(
document-head (
/ BLANK+/ block-scalar + |
+ yaml-node
)
| document-start
yaml-node
)
+
(document-foot | document-end)
# Any kind of YAML node (except alias):
yaml-node:
yaml-props? /+/ (
| /(=[ LCURLY LSQUARE])/ # Assertion optimization
flow-collection
| block-node
)
# Any kind of YAML node (including alias):
any-node:
| yaml-alias
| yaml-node
#------------------------------------------------------------------------------
# Block Constructs
#------------------------------------------------------------------------------
# This rule identifies all the block nodes:
block-node:
| block-sequence
| block-mapping-or-scalar
| block-scalar
# A block sequence is an indented set of nodes each starting with a
# dash+space:
block-sequence:
block-sequence-indent
block-sequence-entry+ % (+ block-sequence-ondent)
block-sequence-undent
# A block sequence entry is a dash+space followed by any node:
block-sequence-entry:
block-sequence-marker
any-node
# A block mapping is an indented set of key/value pairs separated by
# colon+space:
block-mapping-or-scalar:
block-indent
# block-key-or-scalar
block-pair+ % (+ block-ondent)
block-undent
# A block mapping pair is a key/value separated by colon+space:
block-pair:
block-key
any-node
# block key scalar, has more limitations than a block value scalar.
block-key:
yaml-props?
block-key-scalar
pair-separator
# Alias for different receiver method:
block-key-scalar:
| double-quoted-scalar
| single-quoted-scalar
| block-plain-scalar
# A scalar in block form can take one of these 5 forms:
block-scalar:
| literal-scalar
| folded-scalar
| double-quoted-scalar
| single-quoted-scalar
| block-plain-scalar
#------------------------------------------------------------------------------
# Flow Constructs:
#------------------------------------------------------------------------------
# A flow node can be an alias or any one of these 3 kinds:
flow-node:
yaml-alias |
yaml-props? x (
| flow-sequence
| flow-mapping
| flow-scalar
)
# A flow collection is just flow map or seq:
flow-collection:
| flow-sequence
| flow-mapping
# A flow sequence is zero or more nodes, separated by commas, inside square
# brackets. A trailing comma is allowed.
flow-sequence:
flow-sequence-start
flow-node* %% list-separator
flow-sequence-end
# A flow mapping is key / value pairs, separated by commas, inside curly
# braces. A trailing comma is allowed.
flow-mapping:
flow-mapping-start
flow-mapping-pair* %% list-separator
flow-mapping-end
flow-entry:
| json-key flow-node
| 'xxx'
# A flow scalar only has 3 basic forms:
flow-scalar:
| double-quoted-scalar
| single-quoted-scalar
| flow-plain-scalar
# A flow mapping can have any node as key or value, but they must also be in
# flow syntax.
flow-mapping-pair:
( json-key | flow-node pair-separator )
flow-node
json-key: / double-quoted-scalar COLON (= NS) /
# Starting and ending rules for flow collections:
flow-sequence-start: / '[' x/
flow-sequence-end: / x ']' -/
flow-mapping-start: / '{' x/
flow-mapping-end: / x '}' -/
#------------------------------------------------------------------------------
# Scalar Constructs
#------------------------------------------------------------------------------
# Literal scalar.
# literal-scalar: # This rule is written in code in the Grammar class.
# It needs access to the indent level.
# Folded scalar.
# folded-scalar: # This rule is written in code in the Grammar class.
# It needs access to the indent level.
# Double quoted scalar.
double-quoted-scalar: / DOUBLE ((: BACK DOUBLE | [^ DOUBLE])*) DOUBLE /
# Single quoted scalar.
single-quoted-scalar: / SINGLE ((: SINGLE SINGLE | [^ SINGLE])*) SINGLE /
# Plain (unquoted) scalars can't start with syntax chars, and can't contain
# colon+space.
block-plain-scalar: /
(! char-non-start)
( ANY*? )
-
(= COLON WS | EOL | EOS)
/
# Plain (unquoted) scalars in flow context are more restrictive than in block
# context.
flow-plain-scalar: /
(! char-non-start)
( ANY*? )
-
(= [ chars-syntax COMMA ] | COLON SPACE | COMMA SPACE | EOL | EOS)
/
#------------------------------------------------------------------------------
# Indent Constructs:
#------------------------------------------------------------------------------
# block-indent: # This rule is written in code in the Grammar class.
# block-ondent: # This rule is written in code in the Grammar class.
# block-undent: # This rule is written in code in the Grammar class.
# block-sequence-indent: # This rule is written in code in the Grammar class.
# block-sequence-ondent: # This rule is written in code in the Grammar class.
#------------------------------------------------------------------------------
# Other Constructs:
#------------------------------------------------------------------------------
# Trigger stream events:
stream-start: /+/
stream-end: EOS
directive-yaml: /
'%YAML' B - '1.2' +
/
directive-tag: /
'%TAG' B -
BANG ( ANY*) BANG B -
( NS+ )
+
/
# A YAML header is 3 dashes followed by spaces or a newline:
document-head: '---'
# Implicit document start:
document-start: / (= ANY) /
# A YAML footer is 3 dots followed by a newline:
document-foot: / '...' eol /
# Implicit document ending:
document-end: ''
# A node's properties are a anchor and / or tag in any order.
yaml-props: /
(: yaml-anchor yaml-tag?
| yaml-tag yaml-anchor?
) ( - + )
/
# An explicit node tag:
yaml-tag: /(:('!' NS*) -)/
# A Node Anchor is a name for a node. Like '&this'.
yaml-anchor: /(:'&' ( WORD+) -)/
# A Node Alias is a reference to an anchored node. Like '*this'.
yaml-alias: /'*' ( WORD+) -/
block-sequence-marker: /
DASH (: B - | + )
/
# Mapping key / value is always separated by ': ' (colon + space):
pair-separator: /- ':' (= WS) -/
# List items separated by ',' (comma)
list-separator: / x ',' x/
# List of single chars that are YAML syntax (and thus must be avoided in
# various contexts.
chars-syntax: /
AMP
STAR
BANG
LCURLY
RCURLY
LSQUARE
RSQUARE
PERCENT
DOUBLE
SINGLE
/
# YAML's Reserved Chars:
chars-reserved: /
GRAVE
AT
/
# Plain scalar can't start with:
char-non-start: /[
chars-syntax
chars-reserved
HASH
]/
#------------------------------------------------------------------------------
# Whitespace Rules:
#------------------------------------------------------------------------------
# Definition of the '-' rule.
# Ignore whitespace and comment up to EOL:
ws1: /
(:
BLANK*
(:
(: (?<= WS) | (?<= ^) )
HASH ANY*
)?
)
/
# Definition of the '+' rule.
# Ignore (possibly multiple) comment lines:
ws2: /(: - eol)*/
# Assert blank (space or tab):
B: /(= BLANK)/
# 'x' is ws rule to eat blanks after '+' (ws2):
x: /+ -/
# YAML might not have a final newline:
eol: / (: EOL | EOS ) /
# Make sure special 'ws' rule is never used:
ws: 'XXX'
# Vim Helpers, until we get `pegex.vim` mode.
# vim: set lisp sw=2:
|