File: optimize_parser.rb

package info (click to toggle)
jruby 9.4.8.0%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 89,244 kB
  • sloc: ruby: 548,574; java: 276,189; yacc: 25,873; ansic: 6,178; xml: 6,111; sh: 1,855; sed: 94; makefile: 78; jsp: 48; tcl: 40; exp: 12
file content (141 lines) | stat: -rw-r--r-- 3,903 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
class PostProcessor
  def initialize(source, is_parser = true, out=STDOUT)
    @out = out
    @lines = File.readlines(source)
    @index = -1
    @case_bodies = {}
    @max_case_number = -1
    @sub_type_index = is_parser ? 0 : 1
  end

  # Read/Unread with ability to push back one line for a single lookahead
  def read
    @index += 1
    line = @last ? @last : @lines[@index]
    @last = nil
    line
  end

  def unread(line)
    @index -= 1
    @last = line
  end

  def end_of_actions?(line)
     return line =~ %r{^//\s*ACTIONS_END}
  end

  def translate
    while line = read
      if line =~ %r{^/\*@@=}
        read_text_substitutions
      elsif line =~ %r{^//\s*ACTIONS_BEGIN}
        translate_actions
      elsif line =~ %r{^//\s*ACTION_BODIES}
        generate_action_body_methods
      else
        @out.puts line
      end
    end
  end

  # We define substitutions at the top of the file where a constant
  # named SUBS contains a key which represents a subtition and two value
  # where the first value is what is substituted when writing the Parser
  # and the second value is what is substituted when writing Ripper Parser.
  #
  # Any reference to @@name@@ will be replaced with first or second value
  # later on in the grammar file.
  def read_text_substitutions
    code = ''
    while line = read
      break if line =~ %r{^=@@\*/}
      code << line
    end
    # Reads in substitions into the constant SUBS
    eval code
  end

  def generate_action_body_methods
    parser_name = RIPPER ? "RipperParser" : "RubyParser"

    @out.puts "static ParserState<#{parser_name}>[] states = new ParserState[#{@max_case_number+1}];"
    @out.puts "static {";
    @case_bodies.each do |state, (code_body, line_number)| 
      generate_action_body_method(state, code_body, line_number, parser_name) 
    end
    @out.puts "}";
  end

  def generate_action_body_method(state, code_body, line_number, parser_name)
#    @out.puts "// line: #{line_number}"
    @out.puts "states[#{state}] = (#{parser_name} p, Object yyVal, ProductionState[] yyVals, int yyTop, int count, int yychar) -> {" 
    code_body.each { |line| @out.puts frob_yyVals(line) }
    @out.puts "  return yyVal;"
    @out.puts "};"
  end

  # @{num} allows us to get direct access to the production state for that
  # production or token.  This is used for specialized reporting in syntax
  # error messaged where we want to highlight a specific region.
  def frob_yyVals(line)
    line
      .gsub(/yyVals\[([^\]]+)\]/, 'yyVals[\1].value')
      .gsub(/@(\d+)/, 'yyVals[yyTop - count + \1]')
  end

  def translate_actions
    count = 1
    while (translate_action)
      count += 1
    end
  end

  # Assumptions:
  # 1. no break; in our code.  A bit weak, but this is highly specialized code.
  # 2. All productions will have a line containing only { (with optional comment)
  # 3. All productions will end with a line containly only } followed by break in ass 1.
  def translate_action
    line = read
    return false if end_of_actions?(line) || line !~ /case\s+(\d+):/
    case_number = $1.to_i

    line = read
    return false if line !~ /line\s+(\d+)/
    line_number = $1

    # Extra boiler plate '{' that we do not need
    line = read
    return false if line !~ /^\s*\{\s*(\/\*.*\*\/)?$/

    @max_case_number = case_number if case_number > @max_case_number

    body = []
    last_line = nil
    while (line = read)
      if line =~ /^\s*\}\s*$/ # Extra trailing boiler plate
        next_line = read
        if next_line =~ /break;/
          break
        else
          body << line
          unread next_line
        end
      else
        body << line
      end
    end

    @case_bodies[case_number] = [body, line_number]
    true
  end
end

if ARGV[0] =~ /(ripper_|Ripper)/
  RIPPER = true
else
  RIPPER = false
end
$stderr.puts "RIPPER: #{RIPPER}"

PostProcessor.new(ARGV.shift, !RIPPER).translate