1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
class PostProcessor
def initialize(source, is_parser = true, out=STDOUT)
@out = out
@lines = File.readlines(source)
@index = -1
@case_bodies = {}
@max_case_number = -1
@sub_type_index = is_parser ? 0 : 1
end
# Read/Unread with ability to push back one line for a single lookahead
def read
@index += 1
line = @last ? @last : @lines[@index]
@last = nil
line
end
def unread(line)
@index -= 1
@last = line
end
def end_of_actions?(line)
return line =~ %r{^//\s*ACTIONS_END}
end
def translate
while line = read
if line =~ %r{^/\*@@=}
read_text_substitutions
elsif line =~ %r{^//\s*ACTIONS_BEGIN}
translate_actions
elsif line =~ %r{^//\s*ACTION_BODIES}
generate_action_body_methods
else
@out.puts line
end
end
end
# We define substitutions at the top of the file where a constant
# named SUBS contains a key which represents a subtition and two value
# where the first value is what is substituted when writing the Parser
# and the second value is what is substituted when writing Ripper Parser.
#
# Any reference to @@name@@ will be replaced with first or second value
# later on in the grammar file.
def read_text_substitutions
code = ''
while line = read
break if line =~ %r{^=@@\*/}
code << line
end
# Reads in substitions into the constant SUBS
eval code
end
def generate_action_body_methods
parser_name = RIPPER ? "RipperParser" : "RubyParser"
@out.puts "static ParserState<#{parser_name}>[] states = new ParserState[#{@max_case_number+1}];"
@out.puts "static {";
@case_bodies.each do |state, (code_body, line_number)|
generate_action_body_method(state, code_body, line_number, parser_name)
end
@out.puts "}";
end
def generate_action_body_method(state, code_body, line_number, parser_name)
# @out.puts "// line: #{line_number}"
@out.puts "states[#{state}] = (#{parser_name} p, Object yyVal, ProductionState[] yyVals, int yyTop, int count, int yychar) -> {"
code_body.each { |line| @out.puts frob_yyVals(line) }
@out.puts " return yyVal;"
@out.puts "};"
end
# @{num} allows us to get direct access to the production state for that
# production or token. This is used for specialized reporting in syntax
# error messaged where we want to highlight a specific region.
def frob_yyVals(line)
line
.gsub(/yyVals\[([^\]]+)\]/, 'yyVals[\1].value')
.gsub(/@(\d+)/, 'yyVals[yyTop - count + \1]')
end
def translate_actions
count = 1
while (translate_action)
count += 1
end
end
# Assumptions:
# 1. no break; in our code. A bit weak, but this is highly specialized code.
# 2. All productions will have a line containing only { (with optional comment)
# 3. All productions will end with a line containly only } followed by break in ass 1.
def translate_action
line = read
return false if end_of_actions?(line) || line !~ /case\s+(\d+):/
case_number = $1.to_i
line = read
return false if line !~ /line\s+(\d+)/
line_number = $1
# Extra boiler plate '{' that we do not need
line = read
return false if line !~ /^\s*\{\s*(\/\*.*\*\/)?$/
@max_case_number = case_number if case_number > @max_case_number
body = []
last_line = nil
while (line = read)
if line =~ /^\s*\}\s*$/ # Extra trailing boiler plate
next_line = read
if next_line =~ /break;/
break
else
body << line
unread next_line
end
else
body << line
end
end
@case_bodies[case_number] = [body, line_number]
true
end
end
if ARGV[0] =~ /(ripper_|Ripper)/
RIPPER = true
else
RIPPER = false
end
$stderr.puts "RIPPER: #{RIPPER}"
PostProcessor.new(ARGV.shift, !RIPPER).translate
|