File: generate_javascript_parser_proto.py

package info (click to toggle)

chromium 73.0.3683.75-1~deb9u1

links: PTS, VCS
area: main
in suites: stretch
size: 1,792,156 kB
sloc: cpp: 13,473,466; ansic: 1,577,080; python: 898,539; javascript: 655,737; xml: 341,883; asm: 306,070; java: 289,969; perl: 80,911; objc: 67,198; sh: 43,184; cs: 27,853; makefile: 12,092; php: 11,064; yacc: 10,373; tcl: 8,875; ruby: 3,941; lex: 1,800; pascal: 1,473; lisp: 812; awk: 41; jsp: 39; sed: 19; sql: 3

file content (140 lines) | stat: -rwxr-xr-x 4,229 bytes

parent folder | download | duplicates (3)

#!/usr/bin/env python

# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""
Script for generating .proto and a conversion .cc file for a templated library
based JavaScript parser fuzzer.
"""

import sys

def ParseWord(word_string):
  # Every part of the word is either a string surrounded by "" or a placeholder
  # $<int>.

  word_string = word_string.lstrip().rstrip()

  parts = []
  while len(word_string) > 0:
    if word_string[0] == '"':
      end_ix = 1 + word_string[1:].index('"')
      parts.append(word_string[1:end_ix])
      word_string = word_string[(end_ix + 1):]
    elif word_string[0] == '$':
      if ' ' in word_string:
        end_ix = word_string.index(' ')
      else:
        end_ix = len(word_string)
      parts.append(int(word_string[1:end_ix]))
      word_string = word_string[end_ix:]
    else:
      assert(False)
    word_string = word_string.lstrip()
  return parts

def GenerateProtoContents(words):
  contents = ''
  for ix in range(len(words)):
    contents += '    token_value_' + str(ix) + ' = ' + str(ix) + ';\n'
  return contents

def GenerateConversionContents(words):
  contents = ''
  ix = 0
  for word in words:
    contents += '    case ' + str(ix) + ':\n'
    max_part = -1
    first = True
    building_string = ''
    for part in word:
      if not first:
        building_string += ' + std::string(" ") + '
      if isinstance(part, str):
        building_string += 'std::string("' + part + '")'
      else:
        if (part > max_part):
          max_part = part
        building_string += ('token_to_string(token.inner_tokens(' + str(part) +
                            '), depth)')
      first = False
    if max_part >= 0:
        contents += ('      if (token.inner_tokens().size() < ' +
                     str(max_part + 1) + ') return std::string("");\n')
    contents += '      return ' + building_string + ';\n'
    ix += 1
  return contents

def ReadDictionary(filename):
  with open(filename) as input_file:
    lines = input_file.readlines()
  words = []
  for line in lines:
    if not line.startswith('#'):
      word = ParseWord(line)
      if len(word) > 0:
        words.append(word)
  return words

def main(argv):
  output_proto_file = argv[1]
  output_cc_file = argv[2]
  input_dict_file = argv[3]

  words = ReadDictionary(input_dict_file)

  proto_header = ('// Generated by generate_javascript_parser_proto.py.\n'
                  '\n'
                  'syntax = "proto2";\n'
                  'package javascript_parser_proto_fuzzer;\n'
                  '\n'
                  'message Token {\n'
                  '  enum Value {\n')


  proto_footer = ('  }\n'
                  '  required Value value = 1;\n'
                  '  repeated Token inner_tokens = 2;\n'
                  '}\n'
                  '\n'
                  'message Source {\n'
                  '  required bool is_module = 1;\n'
                  '  repeated Token tokens = 2;\n'
                  '}\n')

  proto_contents = proto_header + GenerateProtoContents(words) + proto_footer

  with open(output_proto_file, 'w') as f:
    f.write(proto_contents)

  conversion_header = (
      '// Generated by generate_javascript_parser_proto.py.\n'
      '\n'
      '#include "testing/libfuzzer/fuzzers/'
      'javascript_parser_proto_to_string.h"\n'
      '\n'
      '// Bound calls to token_to_string to prevent memory usage from growing\n'
      '// too much.\n'
      'const int kMaxRecursiveDepth = 9;\n'
      '\n'
      'std::string token_to_string(\n'
      '    const javascript_parser_proto_fuzzer::Token& token, int depth)'
      ' {\n'
      '  if (++depth == kMaxRecursiveDepth) return std::string("");\n'
      '  switch(token.value()) {\n')

  conversion_footer = ('    default: break;\n'
                       '  }\n'
                       '  return std::string("");\n'
                       '}\n')

  conversion_contents = (conversion_header + GenerateConversionContents(words)
                         + conversion_footer)

  with open(output_cc_file, 'w') as f:
    f.write(conversion_contents)

if __name__ == "__main__":
  main(sys.argv)