File: json.py

package info (click to toggle)
funcparserlib 0.3.5-2
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 396 kB
  • sloc: python: 725; sh: 70; makefile: 37
file content (130 lines) | stat: -rw-r--r-- 3,669 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-

r'''A JSON parser using funcparserlib.

The parser is based on [the JSON grammar][1].

  [1]: http://tools.ietf.org/html/rfc4627
'''

import sys, os, re, logging
from re import VERBOSE
from pprint import pformat
from funcparserlib.lexer import make_tokenizer, Token, LexerError
from funcparserlib.parser import (some, a, maybe, many, finished, skip,
    forward_decl, NoParseError)

ENCODING = 'utf-8'
regexps = {
    'escaped': ur'''
        \\                                  # Escape
          ((?P<standard>["\\/bfnrt])        # Standard escapes
        | (u(?P<unicode>[0-9A-Fa-f]{4})))   # uXXXX
        ''',
    'unescaped': ur'''
        [\x20-\x21\x23-\x5b\x5d-\uffff]     # Unescaped: avoid ["\\]
        ''',
}
re_esc = re.compile(regexps['escaped'], VERBOSE)

def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        ('Space',  (r'[ \t\r\n]+',)),
        ('String', (ur'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE)),
        ('Number', (r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', VERBOSE)),
        ('Op',     (r'[{}\[\]\-,:]',)),
        ('Name',   (r'[A-Za-z_][A-Za-z_0-9]*',)),
    ]
    useless = ['Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]

def parse(seq):
    'Sequence(Token) -> object'
    const = lambda x: lambda _: x
    tokval = lambda x: x.value
    toktype = lambda t: some(lambda x: x.type == t) >> tokval
    op = lambda s: a(Token('Op', s)) >> tokval
    op_ = lambda s: skip(op(s))
    n = lambda s: a(Token('Name', s)) >> tokval
    def make_array(n):
        if n is None:
            return []
        else:
            return [n[0]] + n[1]
    def make_object(n):
        return dict(make_array(n))
    def make_number(n):
        try:
            return int(n)
        except ValueError:
            return float(n)
    def unescape(s):
        std = {
            '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n',
            'r': '\r', 't': '\t',
        }
        def sub(m):
            if m.group('standard') is not None:
                return std[m.group('standard')]
            else:
                return unichr(int(m.group('unicode'), 16))
        return re_esc.sub(sub, s)
    def make_string(n):
        return unescape(n[1:-1])

    null = n('null') >> const(None)
    true = n('true') >> const(True)
    false = n('false') >> const(False)
    number = toktype('Number') >> make_number
    string = toktype('String') >> make_string
    value = forward_decl()
    member = string + op_(':') + value >> tuple
    object = (
        op_('{') +
        maybe(member + many(op_(',') + member)) +
        op_('}')
        >> make_object)
    array = (
        op_('[') +
        maybe(value + many(op_(',') + value)) +
        op_(']')
        >> make_array)
    value.define(
          null
        | true
        | false
        | object
        | array
        | number
        | string)
    json_text = object | array
    json_file = json_text + skip(finished)

    return json_file.parse(seq)

def loads(s):
    'str -> object'
    return parse(tokenize(s))

def main():
    logging.basicConfig(level=logging.DEBUG)
    try:
        stdin = os.fdopen(sys.stdin.fileno(), 'rb')
        input = stdin.read().decode(ENCODING)
        tree = loads(input)
        print pformat(tree)
    except (NoParseError, LexerError), e:
        msg = (u'syntax error: %s' % e).encode(ENCODING)
        print >> sys.stderr, msg
        sys.exit(1)

if __name__ == '__main__':
    main()