File: format_parser.kpeg

package info (click to toggle)
ruby-kpeg 1.3.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 608 kB
  • sloc: ruby: 11,839; makefile: 10
file content (141 lines) | stat: -rw-r--r-- 5,251 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
%% name = KPeg::FormatParser
%% custom_initialize = true

%% pre-class {
require 'kpeg/grammar'
}

%% {

    ##
    # Creates a new kpeg format parser for +str+.

    def initialize(str, debug=false)
      setup_parser(str, debug)
      @g = KPeg::Grammar.new
    end

    ##
    # The parsed grammar

    attr_reader :g

    alias_method :grammar, :g
}


             eol = "\n"
     eof_comment = "#" (!eof .)*

         comment = "#" (!eol .)* eol
           space = " " | "\t" | eol
               - = (space | comment)*
          kleene = "*"

                   # Allow - by itself, but not at the beginning
             var = < "-" | /[a-z][\w-]*/i > { text }
          method = < /[a-z_]\w*/i > { text }

     dbl_escapes = "n" { "\n" }
                 | "s" { " " }
                 | "r" { "\r" }
                 | "t" { "\t" }
                 | "v" { "\v" }
                 | "f" { "\f" }
                 | "b" { "\b" }
                 | "a" { "\a" }
                 | "e" { "\e" }
                 | "\\" { "\\" }
                 | "\"" { "\"" }
                 | num_escapes
                 | < . > { text }
     num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
                 | "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") }
                 # TODO use /\h{2}/ after 1.8 support is dropped
         dbl_seq = < /[^\\"]+/ > { text }
   dbl_not_quote = ("\\" dbl_escapes | dbl_seq)*:ary { Array(ary) }
      dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
sgl_escape_quote = "\\'" { "'" }
         sgl_seq = < /[^']/ > { text }
   sgl_not_quote = (sgl_escape_quote | sgl_seq)*:segs { Array(segs) }
      sgl_string = "'" sgl_not_quote:s "'" { @g.str(s.join) }
          string = dbl_string
                 | sgl_string

       not_slash = < ("\\/" | /[^\/]/)+ > { text }
     regexp_opts = < [a-z]* > { text }
          regexp = "/" not_slash:body "/" regexp_opts:opts
                   { @g.reg body, opts }

            char = < /[a-z\d]/i > { text }
      char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }

       range_num = < /[1-9]\d*/ > { text }
      range_elem = < range_num|kleene > { text }
      mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
                   { [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
                 | "[" - range_num:e - "]" { [e.to_i, e.to_i] }

     curly_block = curly
           curly = "{" < (spaces | /[^{}"']+/ | string | curly)* > "}" { @g.action(text) }
    nested_paren = "(" (/[^()"']+/ | string | nested_paren)* ")"

           value = value:v ":" var:n { @g.t(v,n) }
                 | value:v "?" { @g.maybe(v) }
                 | value:v "+" { @g.many(v) }
                 | value:v "*" { @g.kleene(v) }
                 | value:v mult_range:r { @g.multiple(v, *r) }
                 | "&" value:v { @g.andp(v) }
                 | "!" value:v { @g.notp(v) }
                 | "(" - expression:o - ")" { o }
                 | "@<" - expression:o - ">" { @g.bounds(o) }
                 | "<" - expression:o - ">" { @g.collect(o) }
                 | curly_block
                 | "~" method:m < nested_paren? >
                   { @g.action("#{m}#{text}") }
                 | "." { @g.dot }
                 | "@" var:name < nested_paren? > !(- "=")
                   { @g.invoke(name, text.empty? ? nil : text) }
                 | "^" var:name < nested_paren? >
                   { @g.foreign_invoke("parent", name, text) }
                 | "%" var:gram "." var:name < nested_paren? >
                   { @g.foreign_invoke(gram, name, text) }
                 | var:name < nested_paren? > !(- "=")
                   { @g.ref(name, nil, text.empty? ? nil : text) }
                 | char_range
                 | regexp
                 | string

          spaces = (space | comment)+
          values = values:s spaces value:v { @g.seq(s, v) }
                 | value:l spaces value:r  { @g.seq(l, r) }
                 | value
     choose_cont = - "|" - values:v { v }
      expression = values:v choose_cont+:alts { @g.any(v, *alts) }
                 | values
            args = args:a "," - var:n - { a + [n] }
                 | - var:n - { [n] }
       statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
                 | - var:v - "=" - expression:o { @g.set(v, o) }
                 | - "%" var:name - "=" - < /[:\w]+/ >
                   { @g.add_foreign_grammar(name, text) }
                 | - "%%" - curly:act { @g.add_setup act }
                 | - "%%" - var:name - curly:act { @g.add_directive name, act }
                 | - "%%" - var:name - "=" - < (!"\n" .)+ >
                   { @g.set_variable(name, text) }
      statements = statement (- statements)?
             eof = !.
            root = statements - eof_comment? eof

# These are a seperate set of rules used to parse an ast declaration

    ast_constant = < /[A-Z]\w*/ > { text }
        ast_word = < /[a-z_]\w*/i > { text }

          ast_sp = (" " | "\t")*

       ast_words = ast_words:r ast_sp "," ast_sp ast_word:w { r + [w] }
                 | ast_word:w { [w] }

        ast_root = ast_constant:c "(" ast_words:w ")" { [c, w] }
                 | ast_constant:c "()"? { [c, []] }