File: json_comment_eater.py

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,112,112 kB
  • sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (78 lines) | stat: -rwxr-xr-x 2,289 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# Copyright 2012 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

'''Utility to remove comments from JSON files so that they can be parsed by
json.loads.
'''

import sys


def _Rcount(string, chars):
  '''Returns the number of consecutive characters from |chars| that occur at the
  end of |string|.
  '''
  return len(string) - len(string.rstrip(chars))


def _FindNextToken(string, tokens, start):
  '''Finds the next token in |tokens| that occurs in |string| from |start|.
  Returns a tuple (index, token key).
  '''
  for index, item in enumerate(string, start):
    for k in tokens:
      if (string[index:index + len(k)] == k):
        return (index, k)

  return (-1, None)

def _ReadString(input, start, output):
  output.append('"')
  start_range, end_range = (start, input.find('"', start))
  # \" escapes the ", \\" doesn't, \\\" does, etc.
  while (end_range != -1 and
         _Rcount(input[start_range:end_range], '\\') % 2 == 1):
    start_range, end_range = (end_range, input.find('"', end_range + 1))
  if end_range == -1:
    return start_range + 1
  output.append(input[start:end_range + 1])
  return end_range + 1


def _ReadComment(input, start, output):
  eol_tokens = ('\n', '\r')
  eol_token_index, eol_token = _FindNextToken(input, eol_tokens, start)
  if eol_token is None:
    return len(input)
  output.append(eol_token)
  return eol_token_index + len(eol_token)

def _ReadMultilineComment(input, start, output):
  end_tokens = ('*/',)
  end_token_index, end_token = _FindNextToken(input, end_tokens, start)
  if end_token is None:
    raise Exception("Multiline comment end token (*/) not found")
  return end_token_index + len(end_token)

def Nom(input):
  token_actions = {
    '"': _ReadString,
    '//': _ReadComment,
    '/*': _ReadMultilineComment,
  }
  output = []
  pos = 0
  while pos < len(input):
    token_index, token = _FindNextToken(input, token_actions.keys(), pos)
    if token is None:
      output.append(input[pos:])
      break
    output.append(input[pos:token_index])
    pos = token_actions[token](input, token_index + len(token), output)
  return ''.join(output)


if __name__ == '__main__':
    sys.stdout.write(Nom(sys.stdin.read()))