File: format_parser.kpeg

package info (click to toggle)
ruby-kpeg 1.3.3-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 608 kB
sloc: ruby: 11,839; makefile: 10
file content (141 lines) | stat: -rw-r--r-- 5,251 bytes
%% name = KPeg::FormatParser
%% custom_initialize = true

%% pre-class {
require 'kpeg/grammar'
}

%% {

    ##
    # Creates a new kpeg format parser for +str+.

    def initialize(str, debug=false)
      setup_parser(str, debug)
      @g = KPeg::Grammar.new
    end

    ##
    # The parsed grammar

    attr_reader :g

    alias_method :grammar, :g
}


             eol = "\n"
     eof_comment = "#" (!eof .)*

         comment = "#" (!eol .)* eol
           space = " " | "\t" | eol
               - = (space | comment)*
          kleene = "*"

                   # Allow - by itself, but not at the beginning
             var = < "-" | /[a-z][\w-]*/i > { text }
          method = < /[a-z_]\w*/i > { text }

     dbl_escapes = "n" { "\n" }
                 | "s" { " " }
                 | "r" { "\r" }
                 | "t" { "\t" }
                 | "v" { "\v" }
                 | "f" { "\f" }
                 | "b" { "\b" }
                 | "a" { "\a" }
                 | "e" { "\e" }
                 | "\\" { "\\" }
                 | "\"" { "\"" }
                 | num_escapes
                 | < . > { text }
     num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
                 | "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") }
                 # TODO use /\h{2}/ after 1.8 support is dropped
         dbl_seq = < /[^\\"]+/ > { text }
   dbl_not_quote = ("\\" dbl_escapes | dbl_seq)*:ary { Array(ary) }
      dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
sgl_escape_quote = "\\'" { "'" }
         sgl_seq = < /[^']/ > { text }
   sgl_not_quote = (sgl_escape_quote | sgl_seq)*:segs { Array(segs) }
      sgl_string = "'" sgl_not_quote:s "'" { @g.str(s.join) }
          string = dbl_string
                 | sgl_string

       not_slash = < ("\\/" | /[^\/]/)+ > { text }
     regexp_opts = < [a-z]* > { text }
          regexp = "/" not_slash:body "/" regexp_opts:opts
                   { @g.reg body, opts }

            char = < /[a-z\d]/i > { text }
      char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }

       range_num = < /[1-9]\d*/ > { text }
      range_elem = < range_num|kleene > { text }
      mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
                   { [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
                 | "[" - range_num:e - "]" { [e.to_i, e.to_i] }

     curly_block = curly
           curly = "{" < (spaces | /[^{}"']+/ | string | curly)* > "}" { @g.action(text) }
    nested_paren = "(" (/[^()"']+/ | string | nested_paren)* ")"

           value = value:v ":" var:n { @g.t(v,n) }
                 | value:v "?" { @g.maybe(v) }
                 | value:v "+" { @g.many(v) }
                 | value:v "*" { @g.kleene(v) }
                 | value:v mult_range:r { @g.multiple(v, *r) }
                 | "&" value:v { @g.andp(v) }
                 | "!" value:v { @g.notp(v) }
                 | "(" - expression:o - ")" { o }
                 | "@<" - expression:o - ">" { @g.bounds(o) }
                 | "<" - expression:o - ">" { @g.collect(o) }
                 | curly_block
                 | "~" method:m < nested_paren? >
                   { @g.action("#{m}#{text}") }
                 | "." { @g.dot }
                 | "@" var:name < nested_paren? > !(- "=")
                   { @g.invoke(name, text.empty? ? nil : text) }
                 | "^" var:name < nested_paren? >
                   { @g.foreign_invoke("parent", name, text) }
                 | "%" var:gram "." var:name < nested_paren? >
                   { @g.foreign_invoke(gram, name, text) }
                 | var:name < nested_paren? > !(- "=")
                   { @g.ref(name, nil, text.empty? ? nil : text) }
                 | char_range
                 | regexp
                 | string

          spaces = (space | comment)+
          values = values:s spaces value:v { @g.seq(s, v) }
                 | value:l spaces value:r  { @g.seq(l, r) }
                 | value
     choose_cont = - "|" - values:v { v }
      expression = values:v choose_cont+:alts { @g.any(v, *alts) }
                 | values
            args = args:a "," - var:n - { a + [n] }
                 | - var:n - { [n] }
       statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
                 | - var:v - "=" - expression:o { @g.set(v, o) }
                 | - "%" var:name - "=" - < /[:\w]+/ >
                   { @g.add_foreign_grammar(name, text) }
                 | - "%%" - curly:act { @g.add_setup act }
                 | - "%%" - var:name - curly:act { @g.add_directive name, act }
                 | - "%%" - var:name - "=" - < (!"\n" .)+ >
                   { @g.set_variable(name, text) }
      statements = statement (- statements)?
             eof = !.
            root = statements - eof_comment? eof

# These are a seperate set of rules used to parse an ast declaration

    ast_constant = < /[A-Z]\w*/ > { text }
        ast_word = < /[a-z_]\w*/i > { text }

          ast_sp = (" " | "\t")*

       ast_words = ast_words:r ast_sp "," ast_sp ast_word:w { r + [w] }
                 | ast_word:w { [w] }

        ast_root = ast_constant:c "(" ast_words:w ")" { [c, w] }
                 | ast_constant:c "()"? { [c, []] }