File: munge.rb

package info (click to toggle)
ruby-ruby-parser 3.21.1-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 7,728 kB
sloc: ruby: 136,318; yacc: 6,245; makefile: 11
file content (256 lines) | stat: -rwxr-xr-x 8,599 bytes
#!/usr/bin/env ruby -ws

$v ||= false

stack = []
last_token = nil
reduce_line = nil

def munge s
  renames = [
             "'='",             "tEQL",
             "'!'",             "tBANG",
             "'%'",             "tPERCENT",
             "'&'",             "tAMPER2",
             "'('",             "tLPAREN2",
             "')'",             "tRPAREN",
             "'*'",             "tSTAR2",
             "'+'",             "tPLUS",
             "','",             "tCOMMA",
             "'-'",             "tMINUS",
             "'.'",             "tDOT",
             "'/'",             "tDIVIDE",
             "';'",             "tSEMI",
             "':'",             "tCOLON",
             "'<'",             "tLT",
             "'>'",             "tGT",
             "'?'",             "tEH",
             "'['",             "tLBRACK",
             "'\\n'",           "tNL",
             "']'",             "tRBRACK",
             "'^'",             "tCARET",
             "'`'",             "tBACK_REF2",
             "'{'",             "tLCURLY",
             "'|'",             "tPIPE",
             "'}'",             "tRCURLY",
             "'~'",             "tTILDE",
             '"["',             "tLBRACK",

             # 2.0 changes?
             '"<=>"',            "tCMP",
             '"=="',             "tEQ",
             '"==="',            "tEQQ",
             '"!~"',             "tNMATCH",
             '"=~"',             "tMATCH",
             '">="',             "tGEQ",
             '"<="',             "tLEQ",
             '"!="',             "tNEQ",
             '"<<"',             "tLSHFT",
             '">>"',             "tRSHFT",
             '"*"',              "tSTAR",

             '".."',             "tDOT2",

             '"&"',              "tAMPER",
             '"&&"',             "tANDOP",
             '"&."',             "tLONELY",
             '"||"',             "tOROP",

             '"..."',            "tDOT3",
             '"**"',             "tPOW",
             '"unary+"',         "tUPLUS",
             '"unary-"',         "tUMINUS",
             '"[]"',             "tAREF",
             '"[]="',            "tASET",
             '"::"',             "tCOLON2",
             '"{ arg"',          "tLBRACE_ARG",
             '"( arg"',          "tLPAREN_ARG",
             '"("',              "tLPAREN",
             'rparen',           "tRPAREN",
             '"{"',              "tLBRACE",
             '"=>"',             "tASSOC",
             '"->"',             "tLAMBDA",
             '":: at EXPR_BEG"', "tCOLON3",
             '"**arg"',          "tDSTAR",
             '","',              "tCOMMA",

             # other

             'kTERMINATOR',     "tSTRING_END",
             '"kTERMINATOR"',   "tSTRING_END",
             'kTRCURLY',        "tSTRING_DEND",

             '"symbol literal"',       "tSYMBEG",
             '"string literal"',       "tSTRING_BEG",
             '"backtick literal"',     "tXSTRING_BEG",
             '"regexp literal"',       "tREGEXP_BEG",
             '"word list"',            "tWORDS_BEG",
             '"verbatim word list"',   "tQWORDS_BEG",
             '"symbol list"',          "tSYMBOLS_BEG",
             '"verbatim symbol list"', "tQSYMBOLS_BEG",
             '"terminator"',           "tSTRING_END",
             '"\'}\'"',                  "tSTRING_DEND",

             '"string literal"',"tSTRING_BEG",
             '"literal content"', "tSTRING_CONTENT",
             /\$/,              "", # try to remove these lumps?

             'tLBRACK2',        "tLBRACK", # HACK

             "' '",             "tSPACE", # needs to be later to avoid bad hits

             "/* empty */",     "none",
             /^\s*$/,           "",

             "keyword_BEGIN",   "klBEGIN",
             "keyword_END",     "klEND",
             /keyword_(\w+)/,   proc { "k#{$1.upcase}" },
             /\bk_([a-z_]+)/,   proc { "k#{$1.upcase}" },
             /modifier_(\w+)/,  proc { "k#{$1.upcase}_MOD" },
             "kVARIABLE",       "keyword_variable", # ugh
             "tCONST",          "kCONST",

             # 2.6 collapses klBEGIN to kBEGIN
             "klBEGIN",   "kBEGIN",
             "klEND",     "kEND",

             /keyword_(\w+)/,          proc { "k#{$1.upcase}" },
             /\bk_([^_][a-z_]+)/,      proc { "k#{$1.upcase}" },
             /modifier_(\w+)/,         proc { "k#{$1.upcase}_MOD" },

             "kVARIABLE",       "keyword_variable", # ugh: this is a rule name

             # UGH
             "k_LINE__",       "k__LINE__",
             "k_FILE__",       "k__FILE__",
             "k_ENCODING__",   "k__ENCODING__",

             '"defined?"',     "kDEFINED",

             "<none>",         "none",

             '"do (for condition)"', "kDO_COND",
             '"do (for lambda)"',    "kDO_LAMBDA",
             '"do (for block)"',     "kDO_BLOCK",
             '"local variable or method"', "tIDENTIFIER",

             /\"(\w+) \(modifier\)\"/, proc { |x| "k#{$1.upcase}_MOD" },
             /\"(\w+)\"/,              proc { |x| "k#{$1.upcase}" },
             /\"`(\w+)'\"/,            proc { |x| "k#{$1.upcase}" },

             /@(\d+)(\s+|$)/,       "",
             /\$?@(\d+) */,         "", # TODO: remove?

             /_EXPR/,               "",
            ]

  renames.each_slice(2) do |(a, b)|
    if Proc === b then
      s.gsub!(a, &b)
    else
      s.gsub!(a, b)
    end
  end

  if s.empty? then
    nil
  else
    s.strip.squeeze " "
  end
end

ARGF.each_line do |line|
  case line
  when /^(Stack now|Entering state|Shifting|Cleanup|Starting)/ then
    # do nothing
  when /^vtable_/ then
    # do nothing
  when /Gem::MissingSpecError/ then
    # do nothing -- ruby 2.5 is being bitchy?
  when /^Reading a token: Next token is token (.*?) \(\)/ then
    token = munge $1
    next if last_token == token
    puts "next token is %p" % [token]
    last_token = token
  when /^Reading a token: / then
    next # skip
  when /^Reading a token$/ then # wtf?
    next # skip
  when /^(?:add_delayed_token|parser_dispatch)/ then # dunno what this is yet
    next # skip
  when /^read\s+:(\w+)/ then # read    :tNL(tNL) nil
    token = munge $1
    next if last_token == token
    puts "next token is %p" % [token]
    last_token = token
  when /^Next token is token ("[^"]+"|\S+)/ then
    token = munge $1
    next if last_token == token
    puts "next token is %p" % [token]
    last_token = token
  when /^read\s+false/ then # read    false($end) "$end"
    puts "next token is EOF"
  when /^Now at end of input./ then
    # do nothing
  when /^.:scan=>\["([^"]+)"/ then
    puts "scan = %p" % [$1]
  when /^.:getch=>\["([^"]+)/ then
    puts "SCAN = %p" % [$1]
  when /^Reducing stack by rule (\d+) \(line (\d+)\):/ then
    reduce_line = $2.to_i
  when /^   \$\d+ = (?:token|nterm) (.+) \(.*\)/ then
    item = $1
    stack << munge(item)
  when /^-> \$\$ = (?:token|nterm) (.+) \(.*\)/ then
    stack << "none" if stack.empty?
    item = munge $1
    x = stack.compact.map { |s| munge s.strip }.compact.join " "
    if x != item then # prevent kdef -> kdef
      if $v && reduce_line then
        puts "reduce #{x} --> #{item} at #{reduce_line}".squeeze " "
      else
        puts "reduce #{x} --> #{item}".squeeze " "
      end
      puts
    end
    reduce_line = nil
    stack.clear
  when /^reduce/ then # ruby_parser side
    s = munge line.chomp
    next if s =~ /reduce\s+(\w+) --> \1/
    puts s
    puts
  when /^(\w+_stack)\.(\w+)/ then
    # TODO: make pretty, but still informative w/ line numbers etc
    puts line.gsub("true", "1").gsub("false", "0")
    # puts "#{$1}(#{$2})"
  when /^(\w+_stack(\(\w+\))?: \S+)/ then
    # _data = $v ? line.chomp : $1
    # puts line
    # TODO: make pretty, but still informative w/ line numbers etc
    puts line.gsub("true", "1").gsub("false", "0")
  when /^lex_state: :?([\w|()]+) -> :?([\w|]+)(?: (?:at|from) (.*))?/ then
    a, b, c = $1.upcase, $2.upcase, $3
    a.gsub!(/EXPR_/, "")
    b.gsub!(/EXPR_/, "")
    if c && $v then
      puts "lex_state: #{a} -> #{b} at #{c}"
    else
      puts "lex_state: #{a} -> #{b}"
    end
  when /debug|FUCK/ then
    puts line.chomp
  when /^(#.*parse error|on )/ then
    puts line.chomp
  when /^(goto|shift| +\[|$)/ then # racc
    # do nothing
  # when /^Reading a token: Now at end of input./ then
  #   # puts "EOF"
  # when /^Reading a token: Next token is token (.+)/ then
  #   puts "READ: #{$1.inspect}"
  when /^accept/ then
    puts "DONE"
  else
    puts "unparsed: #{line.chomp}"
  end
end