##############################################################################
#
# Copyright (c) 2006 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################

# minjson.py
# reads minimal javascript objects.
# str's objects and fixes the text to write javascript.

#UNICODE USAGE:  Minjson tries hard to accommodate naive usage in a 
#"Do what I mean" manner.  Real applications should handle unicode separately.
# The "right" way to use minjson in an application is to provide minjson a 
# python unicode string for reading and accept a unicode output from minjson's
# writing.  That way, the assumptions for unicode are yours and not minjson's.

# That said, the minjson code has some (optional) unicode handling that you 
# may look at as a model for the unicode handling your application may need.

# Thanks to Patrick Logan for starting the json-py project and making so many
# good test cases.

# Additional thanks to Balazs Ree for replacing the writing module.

# Jim Washington 6 Dec 2006.

# 2006-12-06 Thanks to Koen van de Sande, now handles the case where someone 
#            might want e.g., a literal "\n" in text not a new-line.
# 2005-12-30 writing now traverses the object tree instead of relying on 
#            str() or unicode()
# 2005-10-10 on reading, looks for \\uxxxx and replaces with u'\uxxxx'
# 2005-10-09 now tries hard to make all strings unicode when reading.
# 2005-10-07 got rid of eval() completely, makes object as found by the
#            tokenizer.
# 2005-09-06 imported parsing constants from tokenize; they changed a bit from
#            python2.3 to 2.4
# 2005-08-22 replaced the read sanity code
# 2005-08-21 Search for exploits on eval() yielded more default bad operators.
# 2005-08-18 Added optional code from Koen van de Sande to escape
#            outgoing unicode chars above 128


from re import compile, sub, search, DOTALL
from token import ENDMARKER, NAME, NUMBER, STRING, OP, ERRORTOKEN
from tokenize import tokenize, TokenError, NL

#Usually, utf-8 will work, set this to utf-16 if you dare.
emergencyEncoding = 'utf-8'

class ReadException(Exception):
    pass

class WriteException(Exception):
    pass

#################################
#      read JSON object         #
#################################

slashstarcomment = compile(r'/\*.*?\*/',DOTALL)
doubleslashcomment = compile(r'//.*\n')

unichrRE = compile(r"\\u[0-9a-fA-F]{4,4}")

def unichrReplace(match):
    return unichr(int(match.group()[2:],16))

escapeStrs = (('\n',r'\n'),('\b',r'\b'),
    ('\f',r'\f'),('\t',r'\t'),('\r',r'\r'), ('"',r'\"')
    )

class DictToken:
    __slots__=[]
    pass
class ListToken:
    __slots__=[]
    pass
class ColonToken:
    __slots__=[]
    pass
class CommaToken:
    __slots__=[]
    pass

class JSONReader(object):
    """raise SyntaxError if it is not JSON, and make the object available"""
    def __init__(self,data):
        self.stop = False
        #make an iterator of data so that next() works in tokenize.
        self._data = iter([data])
        self.lastOp = None
        self.objects = []
        self.tokenize()

    def tokenize(self):
        try:
            tokenize(self._data.next,self.readTokens)
        except TokenError:
            raise SyntaxError

    def resolveList(self):
        #check for empty list
        if isinstance(self.objects[-1],ListToken):
            self.objects[-1] = []
            return
        theList = []
        commaCount = 0
        try:
            item = self.objects.pop()
        except IndexError:
            raise SyntaxError
        while not isinstance(item,ListToken):
            if isinstance(item,CommaToken):
                commaCount += 1
            else:
                theList.append(item)
            try:
                item = self.objects.pop()
            except IndexError:
                raise SyntaxError
        if not commaCount == (len(theList) -1):
            raise SyntaxError
        theList.reverse()
        item = theList
        self.objects.append(item)

    def resolveDict(self):
        theList = []
        #check for empty dict
        if isinstance(self.objects[-1], DictToken):
            self.objects[-1] = {}
            return
        #not empty; must have at least three values
        try:
            #value (we're going backwards!)
            value = self.objects.pop()
        except IndexError:
            raise SyntaxError
        try:
            #colon
            colon = self.objects.pop()
            if not isinstance(colon, ColonToken):
                raise SyntaxError
        except IndexError:
            raise SyntaxError
        try:
            #key
            key = self.objects.pop()
            if not isinstance(key,basestring):
                raise SyntaxError
        except IndexError:

            raise SyntaxError
        #salt the while
        comma = value
        while not isinstance(comma,DictToken):
            # store the value
            theList.append((key,value))
            #do it again...
            try:
                #might be a comma
                comma = self.objects.pop()
            except IndexError:
                raise SyntaxError
            if isinstance(comma,CommaToken):
                #if it's a comma, get the values
                try:
                    value = self.objects.pop()
                except IndexError:
                    #print self.objects
                    raise SyntaxError
                try:
                    colon = self.objects.pop()
                    if not isinstance(colon, ColonToken):
                        raise SyntaxError
                except IndexError:
                    raise SyntaxError
                try:
                    key = self.objects.pop()
                    if not isinstance(key,basestring):
                        raise SyntaxError
                except IndexError:
                    raise SyntaxError
        theDict = {}
        for k in theList:
            theDict[k[0]] = k[1]
        self.objects.append(theDict)

    def readTokens(self,type, token, (srow, scol), (erow, ecol), line):
        # UPPERCASE consts from tokens.py or tokenize.py
        if type == OP:
            if token not in "[{}],:-":
                raise SyntaxError
            else:
                self.lastOp = token
            if token == '[':
                self.objects.append(ListToken())
            elif token == '{':
                self.objects.append(DictToken())
            elif token == ']':
                self.resolveList()
            elif token == '}':
                self.resolveDict()
            elif token == ':':
                self.objects.append(ColonToken())
            elif token == ',':
                self.objects.append(CommaToken())
        elif type == STRING:
            tok = token[1:-1]
            parts = tok.split("\\\\")
            for k in escapeStrs:
                if k[1] in tok:
                    parts = [part.replace(k[1],k[0]) for part in parts]
            self.objects.append("\\".join(parts))
        elif type == NUMBER:
            if self.lastOp == '-':
                factor = -1
            else:
                factor = 1
            try:
                self.objects.append(factor * int(token))
            except ValueError:
                self.objects.append(factor * float(token))
        elif type == NAME:
            try:
                self.objects.append({'true':True,
                    'false':False,'null':None}[token])
            except KeyError:
                raise SyntaxError
        elif type == ENDMARKER:
            pass
        elif type == NL:
            pass
        elif type == ERRORTOKEN:
            if ecol == len(line):
                #it's a char at the end of the line.  (mostly) harmless.
                pass
            else:
                raise SyntaxError
        else:
            raise SyntaxError
    def output(self):
        try:
            assert len(self.objects) == 1
        except AssertionError:
            raise SyntaxError
        return self.objects[0]

def safeRead(aString, encoding=None):
    """read the js, first sanitizing a bit and removing any c-style comments
    If the input is a unicode string, great.  That's preferred.  If the input 
    is a byte string, strings in the object will be produced as unicode anyway.
    """
    # get rid of trailing null. Konqueror appends this.
    CHR0 = chr(0)
    while aString.endswith(CHR0):
        aString = aString[:-1]
    # strip leading and trailing whitespace
    aString = aString.strip()
    # zap /* ... */ comments
    aString = slashstarcomment.sub('',aString)
    # zap // comments
    aString = doubleslashcomment.sub('',aString)
    # detect and handle \\u unicode characters. Note: This has the side effect
    # of converting the entire string to unicode. This is probably OK.
    unicodechars = unichrRE.search(aString)
    if unicodechars:
        aString = unichrRE.sub(unichrReplace, aString)
    #if it's already unicode, we won't try to decode it
    if isinstance(aString, unicode):
        s = aString
    else:
        if encoding:
            # note: no "try" here.  the encoding provided must work for the
            # incoming byte string.  UnicodeDecode error will be raised
            # in that case.  Often, it will be best not to provide the encoding
            # and allow the default
            s = unicode(aString, encoding)
            #print "decoded %s from %s" % (s,encoding)
        else:
            # let's try to decode to unicode in system default encoding
            try:
                s = unicode(aString)
                #import sys
                #print "decoded %s from %s" % (s,sys.getdefaultencoding())
            except UnicodeDecodeError:
                # last choice: handle as emergencyEncoding
                enc = emergencyEncoding
                s = unicode(aString, enc)
                #print "%s decoded from %s" % (s, enc)
    # parse and get the object.
    try:
        data = JSONReader(s).output()
    except SyntaxError:
        raise ReadException, 'Unacceptable JSON expression: %s' % aString
    return data

read = safeRead

#################################
#   write object as JSON        #
#################################

import re, codecs
from cStringIO import StringIO

### Codec error handler

def jsonreplace_handler(exc):
    '''Error handler for json

    If encoding fails, \\uxxxx must be emitted. This
    is similar to the "backshashreplace" handler, only
    that we never emit \\xnn since this is not legal
    according to the JSON syntax specs.
    '''
    if isinstance(exc, UnicodeEncodeError):
        part = exc.object[exc.start]
        # repr(part) will convert u'\unnnn' to u'u\\nnnn'
        return u'\\u%04x' % ord(part), exc.start+1
    else:
        raise exc

# register the error handler
codecs.register_error('jsonreplace', jsonreplace_handler)

### Writer

def write(input, encoding='utf-8', outputEncoding=None):
    writer = JsonWriter(input_encoding=encoding, output_encoding=outputEncoding)
    writer.write(input)
    return writer.getvalue()

re_strmangle = re.compile('"|\b|\f|\n|\r|\t|\\\\')

def func_strmangle(match):
    return {
        '"': '\\"',
        '\b': '\\b',
        '\f': '\\f',
        '\n': '\\n',
        '\r': '\\r',
        '\t': '\\t',
        '\\': '\\\\',
        }[match.group(0)]

def strmangle(text):
    return re_strmangle.sub(func_strmangle, text)

class JsonStream(object):

    def __init__(self):
        self.buf = []

    def write(self, text):
        self.buf.append(text)

    def getvalue(self):
        return ''.join(self.buf)

class JsonWriter(object):

    def __init__(self, stream=None, input_encoding='utf-8', output_encoding=None):
        '''
        - stream is optional, if specified must also give output_encoding
        - The input strings can be unicode or in input_encoding
        - output_encoding is optional, if omitted, result will be unicode
        '''
        if stream is not None:
            if output_encoding is None:
                raise WriteException, 'If a stream is given, output encoding must also be provided'
        else:
            stream = JsonStream()
        self.stream = stream
        self.input_encoding = input_encoding
        self.output_encoding = output_encoding

    def write(self, obj):
        if isinstance(obj, (list, tuple)):
            self.stream.write('[')
            first = True
            for elem in obj:
                if first:
                    first = False
                else:
                    self.stream.write(',')
                self.write(elem)
            self.stream.write(']'),
        elif isinstance(obj, dict):
            self.stream.write('{')
            first = True
            for key, value in obj.iteritems():
                if first:
                    first = False
                else:
                    self.stream.write(',')
                self.write(key)
                self.stream.write(':')
                self.write(value)
            self.stream.write('}')
        elif obj is True:
            self.stream.write('true')
        elif obj is False:
            self.stream.write('false')
        elif obj is None:
            self.stream.write('null')
        elif not isinstance(obj, basestring):
            # if we are not baseobj, convert to it
            try:
                obj = str(obj)
            except Exception, exc:
                raise WriteException, 'Cannot write object (%s: %s)' % (exc.__class__, exc)
            self.stream.write(obj)
        else:
            # convert to unicode first
            if not isinstance(obj, unicode):
                try:
                    obj = unicode(obj, self.input_encoding)
                except (UnicodeDecodeError, UnicodeTranslateError):
                    obj = unicode(obj, 'utf-8', 'replace')
            # do the mangling
            obj = strmangle(obj)
            # make the encoding
            if self.output_encoding is not None:
                obj = obj.encode(self.output_encoding, 'jsonreplace')
            self.stream.write('"')
            self.stream.write(obj)
            self.stream.write('"')

    def getvalue(self):
        return self.stream.getvalue()


