File: logic.py

package info (click to toggle)
python-dict2xml 1.7.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 184 kB
  • sloc: python: 768; xml: 108; sh: 36; makefile: 7
file content (296 lines) | stat: -rw-r--r-- 10,084 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
import collections
import collections.abc
import re

start_ranges = "|".join(
    "[{0}]".format(r)
    for r in [
        "\xC0-\xD6",
        "\xD8-\xF6",
        "\xF8-\u02FF",
        "\u0370-\u037D",
        "\u037F-\u1FFF",
        "\u200C-\u200D",
        "\u2070-\u218F",
        "\u2C00-\u2FEF",
        "\u3001-\uD7FF",
        "\uF900-\uFDCF",
        "\uFDF0-\uFFFD",
    ]
)

NameStartChar = re.compile(r"(:|[A-Z]|_|[a-z]|{0})".format(start_ranges))
NameChar = re.compile(r"(\-|\.|[0-9]|\xB7|[\u0300-\u036F]|[\u203F-\u2040])")

########################
###   NODE
########################


class DataSorter:
    """
    Used to sort a map of data depending on it's type
    """

    def keys_from(self, data):
        sorted_data = data
        if not isinstance(data, collections.OrderedDict):
            sorted_data = sorted(data)
        return sorted_data

    class always:
        def keys_from(self, data):
            return sorted(data)

    class never:
        def keys_from(self, data):
            return data


class Node(object):
    """
    Represents each tag in the tree

    Each node has _either_ a single value or one or more children
    If it has a value:
        The serialized result is <%(tag)s>%(value)s</%(tag)s>

    If it has children:
        The serialized result is
            <%(wrap)s>
                %(children)s
            </%(wrap)s>

    Which one it is depends on the implementation of self.convert
    """

    # A mapping of characters to treat as escapable entities and their replacements
    entities = [("&", "&amp;"), ("<", "&lt;"), (">", "&gt;")]

    def __init__(
        self,
        wrap="",
        tag="",
        data=None,
        iterables_repeat_wrap=True,
        closed_tags_for=None,
        data_sorter=None,
    ):
        self.tag = self.sanitize_element(tag)
        self.wrap = self.sanitize_element(wrap)
        self.data = data
        self.type = self.determine_type()
        self.data_sorter = data_sorter if data_sorter is not None else DataSorter()
        self.closed_tags_for = closed_tags_for
        self.iterables_repeat_wrap = iterables_repeat_wrap

        if self.type == "flat" and isinstance(self.data, str):
            # Make sure we deal with entities
            for entity, replacement in self.entities:
                self.data = self.data.replace(entity, replacement)

    def serialize(self, indenter):
        """Returns the Node serialized as an xml string"""
        # Determine the start and end of this node
        wrap = self.wrap
        end, start = "", ""
        if wrap:
            end = "</{0}>".format(wrap)
            start = "<{0}>".format(wrap)

        if self.closed_tags_for and self.data in self.closed_tags_for:
            return "<{0}/>".format(self.wrap)

        # Convert the data attached in this node into a value and children
        value, children = self.convert()

        # Determine the content of the node (essentially the children as a string value)
        content = ""
        if children:
            if self.type != "iterable":
                # Non-iterable wraps all it's children in the same tag
                content = indenter((c.serialize(indenter) for c in children), wrap)
            else:
                if self.iterables_repeat_wrap:
                    # Iterables repeat the wrap for each child
                    result = []
                    for c in children:
                        content = c.serialize(indenter)
                        if c.type == "flat":
                            # Child with value, it already is surrounded by the tag
                            result.append(content)
                        else:
                            # Child with children of it's own, they need to be wrapped by start and end
                            content = indenter([content], True)
                            result.append("".join((start, content, end)))

                    # We already have what we want, return the indented result
                    return indenter(result, False)
                else:
                    result = []
                    for c in children:
                        result.append(c.serialize(indenter))
                    return "".join([start, indenter(result, True), end])

        # If here, either:
        #  * Have a value
        #  * Or this node is not an iterable
        return "".join((start, value, content, end))

    def determine_type(self):
        """
        Return the type of the data on this node as an identifying string

        * Iterable : Supports "for item in data"
        * Mapping : Supports "for key in data: value = data[key]"
        * flat : A string or something that isn't iterable or a mapping
        """
        data = self.data
        if isinstance(data, str):
            return "flat"
        elif isinstance(data, collections.abc.Mapping):
            return "mapping"
        elif isinstance(data, collections.abc.Iterable):
            return "iterable"
        else:
            return "flat"

    def convert(self):
        """
        Convert data on this node into a (value, children) tuple depending on the type of the data
        If the type is :
            * flat : Use self.tag to surround the value. <tag>value</tag>
            * mapping : Return a list of tags where the key for each child is the wrap for that node
            * iterable : Return a list of Nodes where self.wrap is the tag for that node
        """
        val = ""
        typ = self.type
        data = self.data
        children = []

        if typ == "mapping":
            sorted_keys = self.data_sorter.keys_from(data)

            for key in sorted_keys:
                item = data[key]
                children.append(
                    Node(
                        key,
                        "",
                        item,
                        iterables_repeat_wrap=self.iterables_repeat_wrap,
                        closed_tags_for=self.closed_tags_for,
                        data_sorter=self.data_sorter,
                    )
                )

        elif typ == "iterable":
            for item in data:
                children.append(
                    Node(
                        "",
                        self.wrap,
                        item,
                        iterables_repeat_wrap=self.iterables_repeat_wrap,
                        closed_tags_for=self.closed_tags_for,
                        data_sorter=self.data_sorter,
                    )
                )

        else:
            val = str(data)
            if self.tag:
                val = "<{0}>{1}</{2}>".format(self.tag, val, self.tag)

        return val, children

    @staticmethod
    def sanitize_element(wrap):
        """
        Convert `wrap` into a valid tag name applying the XML Naming Rules.

            * Names can contain letters, numbers, and other characters
            * Names cannot start with a number or punctuation character
            * Names cannot start with the letters xml (or XML, or Xml, etc)
            * Names cannot contain spaces
            * Any name can be used, no words are reserved.

        :ref: http://www.w3.org/TR/REC-xml/#NT-NameChar
        """
        if wrap and isinstance(wrap, str):
            if wrap.lower().startswith("xml"):
                wrap = "_" + wrap
            return "".join(
                ["_" if not NameStartChar.match(wrap) else ""]
                + ["_" if not (NameStartChar.match(c) or NameChar.match(c)) else c for c in wrap]
            )
        else:
            return wrap


########################
###   CONVERTER
########################


class Converter(object):
    """Logic for creating a Node tree and serialising that tree into a string"""

    def __init__(self, wrap=None, indent="  ", newlines=True):
        """
        wrap: The tag that the everything else will be contained within
        indent: The string that is multiplied at the start of each new line, to represent each level of nesting
        newlines: A boolean specifying whether we want each tag on a new line.

        Note that indent only works if newlines is True
        """
        self.wrap = wrap
        self.indent = indent
        self.newlines = newlines

    def _make_indenter(self):
        """Returns a function that given a list of strings, will return that list as a single, indented, string"""
        indent = self.indent
        newlines = self.newlines

        if not newlines:
            # No newlines, don't care about indentation
            ret = lambda nodes, wrapped: "".join(nodes)
        else:
            if not indent:
                indent = ""

            def eachline(nodes):
                """Yield each line in each node"""
                for node in nodes:
                    for line in node.split("\n"):
                        yield line

            def ret(nodes, wrapped):
                """
                Indent nodes depending on value of wrapped and indent
                If not wrapped, then don't indent
                Otherwise,
                    Seperate each child by a newline
                    and indent each line in the child by one indent unit
                """
                if wrapped:
                    seperator = "\n{0}".format(indent)
                    surrounding = "\n{0}{{0}}\n".format(indent)
                else:
                    seperator = "\n"
                    surrounding = "{0}"
                return surrounding.format(seperator.join(eachline(nodes)))

        return ret

    def build(self, data, iterables_repeat_wrap=True, closed_tags_for=None, data_sorter=None):
        """Create a Node tree from the data and return it as a serialized xml string"""
        indenter = self._make_indenter()
        return Node(
            wrap=self.wrap,
            data=data,
            iterables_repeat_wrap=iterables_repeat_wrap,
            closed_tags_for=closed_tags_for,
            data_sorter=data_sorter,
        ).serialize(indenter)