File: jsonl.py

package info (click to toggle)
python-django 3%3A3.2.19-1%2Bdeb12u2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 56,696 kB
  • sloc: python: 264,418; javascript: 18,362; xml: 193; makefile: 178; sh: 43
file content (57 lines) | stat: -rw-r--r-- 1,845 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""
Serialize data to/from JSON Lines
"""

import json

from django.core.serializers.base import DeserializationError
from django.core.serializers.json import DjangoJSONEncoder
from django.core.serializers.python import (
    Deserializer as PythonDeserializer, Serializer as PythonSerializer,
)


class Serializer(PythonSerializer):
    """Convert a queryset to JSON Lines."""
    internal_use_only = False

    def _init_options(self):
        self._current = None
        self.json_kwargs = self.options.copy()
        self.json_kwargs.pop('stream', None)
        self.json_kwargs.pop('fields', None)
        self.json_kwargs.pop('indent', None)
        self.json_kwargs['separators'] = (',', ': ')
        self.json_kwargs.setdefault('cls', DjangoJSONEncoder)
        self.json_kwargs.setdefault('ensure_ascii', False)

    def start_serialization(self):
        self._init_options()

    def end_object(self, obj):
        # self._current has the field data
        json.dump(self.get_dump_object(obj), self.stream, **self.json_kwargs)
        self.stream.write("\n")
        self._current = None

    def getvalue(self):
        # Grandparent super
        return super(PythonSerializer, self).getvalue()


def Deserializer(stream_or_string, **options):
    """Deserialize a stream or string of JSON data."""
    if isinstance(stream_or_string, bytes):
        stream_or_string = stream_or_string.decode()
    if isinstance(stream_or_string, (bytes, str)):
        stream_or_string = stream_or_string.split("\n")

    for line in stream_or_string:
        if not line.strip():
            continue
        try:
            yield from PythonDeserializer([json.loads(line)], **options)
        except (GeneratorExit, DeserializationError):
            raise
        except Exception as exc:
            raise DeserializationError() from exc