1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
%% name = KPeg::FormatParser
%% custom_initialize = true
%% pre-class {
require 'kpeg/grammar'
}
%% {
##
# Creates a new kpeg format parser for +str+.
def initialize(str, debug=false)
setup_parser(str, debug)
@g = KPeg::Grammar.new
end
##
# The parsed grammar
attr_reader :g
alias_method :grammar, :g
}
eol = "\n"
eof_comment = "#" (!eof .)*
comment = "#" (!eol .)* eol
space = " " | "\t" | eol
- = (space | comment)*
kleene = "*"
# Allow - by itself, but not at the beginning
var = < "-" | /[a-z][\w-]*/i > { text }
method = < /[a-z_]\w*/i > { text }
dbl_escapes = "n" { "\n" }
| "s" { " " }
| "r" { "\r" }
| "t" { "\t" }
| "v" { "\v" }
| "f" { "\f" }
| "b" { "\b" }
| "a" { "\a" }
| "e" { "\e" }
| "\\" { "\\" }
| "\"" { "\"" }
| num_escapes
| < . > { text }
num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
| "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") }
# TODO use /\h{2}/ after 1.8 support is dropped
dbl_seq = < /[^\\"]+/ > { text }
dbl_not_quote = ("\\" dbl_escapes | dbl_seq)*:ary { Array(ary) }
dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
sgl_escape_quote = "\\'" { "'" }
sgl_seq = < /[^']/ > { text }
sgl_not_quote = (sgl_escape_quote | sgl_seq)*:segs { Array(segs) }
sgl_string = "'" sgl_not_quote:s "'" { @g.str(s.join) }
string = dbl_string
| sgl_string
not_slash = < ("\\/" | /[^\/]/)+ > { text }
regexp_opts = < [a-z]* > { text }
regexp = "/" not_slash:body "/" regexp_opts:opts
{ @g.reg body, opts }
char = < /[a-z\d]/i > { text }
char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }
range_num = < /[1-9]\d*/ > { text }
range_elem = < range_num|kleene > { text }
mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
{ [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
| "[" - range_num:e - "]" { [e.to_i, e.to_i] }
curly_block = curly
curly = "{" < (spaces | /[^{}"']+/ | string | curly)* > "}" { @g.action(text) }
nested_paren = "(" (/[^()"']+/ | string | nested_paren)* ")"
value = value:v ":" var:n { @g.t(v,n) }
| value:v "?" { @g.maybe(v) }
| value:v "+" { @g.many(v) }
| value:v "*" { @g.kleene(v) }
| value:v mult_range:r { @g.multiple(v, *r) }
| "&" value:v { @g.andp(v) }
| "!" value:v { @g.notp(v) }
| "(" - expression:o - ")" { o }
| "@<" - expression:o - ">" { @g.bounds(o) }
| "<" - expression:o - ">" { @g.collect(o) }
| curly_block
| "~" method:m < nested_paren? >
{ @g.action("#{m}#{text}") }
| "." { @g.dot }
| "@" var:name < nested_paren? > !(- "=")
{ @g.invoke(name, text.empty? ? nil : text) }
| "^" var:name < nested_paren? >
{ @g.foreign_invoke("parent", name, text) }
| "%" var:gram "." var:name < nested_paren? >
{ @g.foreign_invoke(gram, name, text) }
| var:name < nested_paren? > !(- "=")
{ @g.ref(name, nil, text.empty? ? nil : text) }
| char_range
| regexp
| string
spaces = (space | comment)+
values = values:s spaces value:v { @g.seq(s, v) }
| value:l spaces value:r { @g.seq(l, r) }
| value
choose_cont = - "|" - values:v { v }
expression = values:v choose_cont+:alts { @g.any(v, *alts) }
| values
args = args:a "," - var:n - { a + [n] }
| - var:n - { [n] }
statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
| - var:v - "=" - expression:o { @g.set(v, o) }
| - "%" var:name - "=" - < /[:\w]+/ >
{ @g.add_foreign_grammar(name, text) }
| - "%%" - curly:act { @g.add_setup act }
| - "%%" - var:name - curly:act { @g.add_directive name, act }
| - "%%" - var:name - "=" - < (!"\n" .)+ >
{ @g.set_variable(name, text) }
statements = statement (- statements)?
eof = !.
root = statements - eof_comment? eof
# These are a seperate set of rules used to parse an ast declaration
ast_constant = < /[A-Z]\w*/ > { text }
ast_word = < /[a-z_]\w*/i > { text }
ast_sp = (" " | "\t")*
ast_words = ast_words:r ast_sp "," ast_sp ast_word:w { r + [w] }
| ast_word:w { [w] }
ast_root = ast_constant:c "(" ast_words:w ")" { [c, w] }
| ast_constant:c "()"? { [c, []] }
|