File: xml_template.py

package info (click to toggle)
python-kajiki 1.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 700 kB
  • sloc: python: 4,098; makefile: 115
file content (1021 lines) | stat: -rw-r--r-- 39,186 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
import abc
import collections
import html
import io
import re
from codecs import open
from sys import version_info
from xml import sax
from xml.dom import minidom as dom
from xml.sax import SAXParseException

from kajiki import ir, template
from kajiki.doctype import DocumentTypeDeclaration, extract_dtd
from kajiki.html_utils import HTML_CDATA_TAGS, HTML_OPTIONAL_END_TAGS, HTML_REQUIRED_END_TAGS
from kajiki.markup_template import QDIRECTIVES, QDIRECTIVES_DICT

impl = dom.getDOMImplementation(" ")


def XMLTemplate(  # noqa: N802
    source=None,
    filename=None,
    mode=None,
    is_fragment=False,  # noqa: FBT002
    encoding="utf-8",
    autoblocks=None,
    cdata_scripts=True,  # noqa: FBT002
    strip_text=False,  # noqa: FBT002
    base_globals=None,
):
    """Given XML source code of a Kajiki Templates parses and returns
    a template class.

    The source code is parsed to its DOM representation by
    :class:`._Parser`, which is then expanded to separate directives
    from tags by :class:`._DomTransformer` and then compiled to the
    *Intermediate Representation* tree by :class:`._Compiler`.

    The *Intermediate Representation* generates the Python code
    which creates a new :class:`kajiki.template._Template` subclass
    through :meth:`kajiki.template.Template`.

    The generated code is then executed to return the newly created
    class.

    Calling ``.render()`` on an instance of the generate class will
    then render the template.
    """
    if source is None:
        with open(filename, encoding=encoding) as f:
            source = f.read()  # source is a unicode string
    if filename is None:
        filename = "<string>"
    doc = _Parser(filename, source).parse()
    doc = _DomTransformer(doc, strip_text=strip_text).transform()
    ir_ = _Compiler(
        filename,
        doc,
        mode=mode,
        is_fragment=is_fragment,
        autoblocks=autoblocks,
        cdata_scripts=cdata_scripts,
    ).compile()
    return template.from_ir(ir_, base_globals=base_globals)


def annotate(gen):
    def inner(self, node, *args, **kwargs):
        for x in gen(self, node, *args, **kwargs):
            self._anno(node, x)
            yield x

    return inner


class _Compiler:
    """Compiles a DOM tree into IR :class:`kajiki.ir.TemplateNode`.

    Intermediate Representation is a tree of nodes that represent
    Python Code that should be generated to execute the template.
    """

    def __init__(
        self,
        filename,
        doc,
        mode=None,
        is_fragment=False,  # noqa: FBT002
        autoblocks=None,
        cdata_scripts=True,  # noqa: FBT002
    ):
        self.filename = filename
        self.doc = doc
        self.is_fragment = is_fragment
        self.functions = collections.defaultdict(list)
        self.functions["__main__()"] = []
        self.function_lnos = {}
        self.mod_py = []
        self.autoblocks = autoblocks or []
        self.cdata_scripts = cdata_scripts
        self.in_def = False
        self.is_child = False
        # The rendering mode is either specified in the *mode* argument,
        # or inferred from the DTD:
        self._dtd = DocumentTypeDeclaration.matching(self.doc._dtd)  # noqa: SLF001
        if mode:
            self.mode = mode
        elif self._dtd:
            self.mode = self._dtd.rendering_mode
        else:  # The template might contain an unknown DTD
            self.mode = "xml"  # by default

    def compile(self):
        """Compile the document provided by :class:`._Parser`.

        Returns as :class:`kajiki.ir.TemplateNode` instance representing
        the whole tree of nodes as their intermediate representation.

        The returned template will include at least a ``__main__``
        function which is the document itself including a DOCTYPE and
        any function declared through ``py:def`` or as a ``py:block``.

        The ``TemplateNode`` will also include the module level
        code specified through ``<?py %``.

        If the compiled document didn't specify a DOCTYPE provides
        one at least for HTML5.

        .. note::
            As this alters the functions and mode wide code
            registries of the compiler ``compile`` should
            never be called twice or might lead to unexpected results.
        """
        templateNodes = [  # noqa: N806
            n for n in self.doc.childNodes if not isinstance(n, dom.Comment)
        ]
        if len(templateNodes) != 1:
            msg = "expected a single root node in document"
            raise XMLTemplateCompileError(msg, self.doc, self.filename, 0)

        body = list(self._compile_node(templateNodes[0]))
        # Never emit doctypes on fragments
        if not self.is_fragment and not self.is_child:
            if self.doc._dtd:  # noqa: SLF001
                dtd = self.doc._dtd  # noqa: SLF001
            elif self.mode == "html5":
                dtd = "<!DOCTYPE html>"
            else:
                dtd = None
            if dtd:
                dtd = ir.TextNode(dtd.strip() + "\n")
                dtd.filename = self.filename
                dtd.lineno = 1
                body.insert(0, dtd)
        self.functions["__main__()"] = body
        defs = []
        for k, v in self.functions.items():
            node = ir.DefNode(k, *v)
            node.lineno = self.function_lnos.get(k)
            defs.append(node)
        node = ir.TemplateNode(self.mod_py, defs)
        node.filename = self.filename
        node.lineno = 0
        return node

    def _anno(self, dom_node, ir_node):
        if ir_node.lineno:
            return
        ir_node.filename = self.filename
        ir_node.lineno = dom_node.lineno

    def _is_autoblock(self, node):
        if node.tagName not in self.autoblocks:
            return False

        if node.hasAttribute("py:autoblock"):
            guard = node.getAttribute("py:autoblock").lower()
            if guard not in ("false", "true"):
                msg = "py:autoblock is evaluated at compile time " "and only accepts True/False constants"
                raise ValueError(msg)
            if guard == "false":
                # We throw away the attribute so it doesn't remain in rendered nodes.
                node.removeAttribute("py:autoblock")
                return False

        return True

    def _compile_node(self, node):
        """Convert a DOM node to its intermediate representation.

        Calls specific compile functions for special nodes and any
        directive that was expanded by :meth:`._DomTransformer._expand_directives`.
        For any plain XML node forward it to :meth:`._compile_xml`.

        Automatically converts any ``autoblock`` node to a ``py:block`` directive.
        """
        if isinstance(node, dom.Comment):
            return self._compile_comment(node)
        if isinstance(node, dom.Text):
            return self._compile_text(node)
        if isinstance(node, dom.ProcessingInstruction):
            return self._compile_pi(node)
        if self._is_autoblock(node):
            # Set the name of the block equal to the tag itself.
            node.setAttribute("name", node.tagName)
            return self._compile_block(node)
        if node.tagName.startswith("py:"):
            # Handle directives
            compiler = getattr(self, "_compile_{}".format(node.tagName.split(":")[-1]), self._compile_xml)
            return compiler(node)
        return self._compile_xml(node)

    @annotate
    def _compile_xml(self, node):
        """Compile plain XML nodes.

        When compiling a node also take care of directives that
        only modify the node itself (``py:strip``, ``py:attrs``
        and ``py:content``) as all directives wrapping the node
        and its children have already been handled by :meth:`._compile_node`.

        The provided intermediate representations include
        the node itself, its attributes and its content.

        Attributes of the node are handled through :class:`._TextCompiler`
        to ensure ${expr} expressions are handled in attributes too.

        In case the node has children (and no py:content)
        compile the children too.
        """
        content = attrs = guard = None
        if node.hasAttribute("py:strip"):
            guard = node.getAttribute("py:strip")
            if guard == "":  # py:strip="" means yes, do strip the tag  # noqa: SIM108
                guard = "False"
            else:
                guard = f"not ({guard})"
            node.removeAttribute("py:strip")
        yield ir.TextNode(f"<{node.tagName}", guard)
        for k, v in sorted(node.attributes.items()):
            tc = _TextCompiler(
                self.filename,
                v,
                node.lineno,
                ir.TextNode,
                in_html_attr=True,
                compiler_instance=self,
            )
            v = list(tc)  # noqa: PLW2901
            if k == "py:content":
                content = node.getAttribute("py:content")
                continue
            elif k == "py:attrs":
                attrs = node.getAttribute("py:attrs")
                continue
            yield ir.AttrNode(k, v, guard, self.mode)
        if attrs:
            yield ir.AttrsNode(attrs, guard, self.mode)
        if content:
            yield ir.TextNode(">", guard)
            yield ir.ExprNode(content)
            yield ir.TextNode(f"</{node.tagName}>", guard)
        elif node.childNodes:
            yield ir.TextNode(">", guard)
            if self.cdata_scripts and node.tagName in HTML_CDATA_TAGS:
                # Special behaviour for <script>, <style> tags:
                if self.mode == "xml":  # Start escaping
                    yield ir.TextNode("/*<![CDATA[*/")
                # Need to unescape the contents of these tags
                for child in node.childNodes:
                    # CDATA for scripts and styles are automatically managed.
                    if getattr(child, "_cdata", False):
                        continue
                    assert isinstance(child, dom.Text)  # noqa: S101
                    for x in self._compile_text(child):
                        if child.escaped:  # If user declared CDATA no escaping happened.
                            x.text = html.unescape(x.text)
                        yield x
                if self.mode == "xml":  # Finish escaping
                    yield ir.TextNode("/*]]>*/")
            else:
                for cn in node.childNodes:
                    # Keep CDATA sections around if declared by user
                    if getattr(cn, "_cdata", False):
                        yield ir.TextNode(cn.data)
                        continue
                    for x in self._compile_node(cn):
                        yield x
            if not (self.mode.startswith("html") and node.tagName in HTML_OPTIONAL_END_TAGS):
                yield ir.TextNode(f"</{node.tagName}>", guard)
        elif node.tagName in HTML_REQUIRED_END_TAGS:
            yield ir.TextNode(f"></{node.tagName}>", guard)
        elif self.mode.startswith("html"):
            if node.tagName in HTML_OPTIONAL_END_TAGS:
                yield ir.TextNode(">", guard)
            else:
                yield ir.TextNode(f"></{node.tagName}>", guard)
        else:
            yield ir.TextNode("/>", guard)

    @annotate
    def _compile_replace(self, node):
        """Convert py:replace nodes to their intermediate representation."""
        yield ir.ExprNode(node.getAttribute("value"))

    @annotate
    def _compile_pi(self, node):
        """Convert <?py and <?python nodes to their intermediate representation.

        Any code identified by :class:`.ir.PythonNode` as ``module_level``
        (it starts with % character) will be registered in compiler registry
        of module wide code to be provided to be template.
        """
        body = ir.TextNode(node.data.strip())
        node = ir.PythonNode(body)
        if node.module_level:
            self.mod_py.append(node)
        else:
            yield node

    @annotate
    def _compile_import(self, node):
        """Convert py:import nodes to their intermediate representation."""
        href = node.getAttribute("href")
        if node.hasAttribute("alias"):
            yield ir.ImportNode(href, node.getAttribute("alias"))
        else:
            yield ir.ImportNode(href)

    @annotate
    def _compile_extends(self, node):
        """Convert py:extends nodes to their intermediate representation."""
        self.is_child = True
        href = node.getAttribute("href")
        yield ir.ExtendNode(href)
        yield from self._compile_nop(node)

    @annotate
    def _compile_include(self, node):
        """Convert py:include nodes to their intermediate representation."""
        href = node.getAttribute("href")
        yield ir.IncludeNode(href)

    @annotate
    def _compile_block(self, node):
        """Convert py:block nodes to their intermediate representation.

        Any compiled block will be registered in the compiler functions
        registry to be provided to the template.
        """
        fname = "_kj_block_" + node.getAttribute("name")
        decl = fname + "()"
        body = list(self._compile_nop(node))
        if not body:
            body = [ir.PassNode()]
        self.functions[decl] = body
        if self.is_child:
            parent_block = "parent." + fname
            body.insert(0, ir.PythonNode(ir.TextNode(f"parent_block={parent_block}")))
        else:
            yield ir.ExprNode(decl)

    @annotate
    def _compile_def(self, node):
        """Convert py:def nodes to their intermediate representation.

        Any compiled definition will be registered in the compiler functions
        registry to be provided to the template.
        """
        old_in_def, self.in_def = self.in_def, True
        body = list(self._compile_nop(node))
        self.in_def = old_in_def
        if self.in_def:
            yield ir.InnerDefNode(node.getAttribute("function"), *body)
        else:
            self.functions[node.getAttribute("function")] = body

    @annotate
    def _compile_call(self, node):
        """Convert py:call nodes to their intermediate representation."""
        if node.childNodes[0].hasAttribute("args"):
            defn = "$caller(" + node.childNodes[0].getAttribute("args") + ")"
        else:
            defn = "$caller()"
        yield ir.CallNode(defn, node.getAttribute("function").replace("%caller", "$caller"), *self._compile_nop(node))

    @annotate
    def _compile_text(self, node):
        """Compile text nodes to their intermediate representation"""
        kwargs = {}
        if node.parentNode and node.parentNode.tagName in HTML_CDATA_TAGS:
            # script and style should always be untranslatable.
            kwargs["node_type"] = ir.TextNode

        tc = _TextCompiler(self.filename, node.data, node.lineno, compiler_instance=self, **kwargs)
        yield from tc

    @annotate
    def _compile_comment(self, node):
        """Convert comments to their intermediate representation."""
        if not node.data.startswith("!"):
            yield ir.TextNode(f"<!-- {node.data} -->")

    @annotate
    def _compile_for(self, node):
        """Convert py:for nodes to their intermediate representation."""
        yield ir.ForNode(node.getAttribute("each"), *list(self._compile_nop(node)))

    @annotate
    def _compile_with(self, node):
        """Convert py:with nodes to their intermediate representation."""
        yield ir.WithNode(node.getAttribute("vars"), *list(self._compile_nop(node)))

    @annotate
    def _compile_switch(self, node):
        """Convert py:switch nodes to their intermediate representation."""
        body = []

        # Filter out empty text nodes and report unsupported nodes
        for n in self._compile_nop(node):
            if isinstance(n, ir.TextNode) and not n.text.strip():
                continue
            elif not isinstance(n, (ir.CaseNode, ir.ElseNode)):
                msg = "py:switch directive can only contain py:case and py:else nodes " "and cannot be placed on a tag."
                raise XMLTemplateCompileError(
                    msg,
                    doc=self.doc,
                    filename=self.filename,
                    linen=node.lineno,
                )
            body.append(n)

        yield ir.SwitchNode(node.getAttribute("test"), *body)

    @annotate
    def _compile_match(self, node):
        """Convert py:match nodes to their IR."""
        if version_info < (3, 10):
            msg = "At least Python 3.10 is required to use the py:match directive"
            raise XMLTemplateCompileError(
                msg,
                doc=self.doc,
                filename=self.filename,
                linen=node.lineno,
            )
        body = []

        # Filter out empty text nodes and report unsupported nodes
        for n in self._compile_nop(node):
            if isinstance(n, ir.TextNode) and not n.text.strip():
                continue
            elif not isinstance(n, ir.MatchCaseNode):
                msg = "py:match directive can only contain py:case nodes and cannot be placed on a tag."
                raise XMLTemplateCompileError(
                    msg,
                    doc=self.doc,
                    filename=self.filename,
                    linen=node.lineno,
                )
            body.append(n)

        yield ir.MatchNode(node.getAttribute("on"), *body)

    @annotate
    def _compile_case(self, node):
        """Convert py:case nodes to their intermediate representation."""
        if node.getAttribute("value"):
            yield ir.CaseNode(node.getAttribute("value"), *list(self._compile_nop(node)))
        elif node.getAttribute("match"):
            yield ir.MatchCaseNode(node.getAttribute("match"), *list(self._compile_nop(node)))
        else:
            msg = "case must have either value or match attribute, the former for py:switch, the latter for py:match"
            raise XMLTemplateCompileError(
                msg,
                doc=self.doc,
                filename=self.filename,
                linen=node.lineno,
            )

    @annotate
    def _compile_if(self, node):
        """Convert py:if nodes to their intermediate representation."""
        yield ir.IfNode(node.getAttribute("test"), *list(self._compile_nop(node)))

    @annotate
    def _compile_else(self, node):
        """Convert py:else nodes to their intermediate representation."""
        if (
            getattr(node.parentNode, "tagName", "") != "py:nop"
            and not node.parentNode.hasAttribute("py:switch")
            and getattr(node.previousSibling, "tagName", "") != "py:if"
        ):
            msg = (
                "py:else directive must be inside a py:switch or directly after py:if "
                "without text or spaces in between"
            )
            raise XMLTemplateCompileError(
                msg,
                doc=self.doc,
                filename=self.filename,
                linen=node.lineno,
            )

        yield ir.ElseNode(*list(self._compile_nop(node)))

    @annotate
    def _compile_nop(self, node):
        for c in node.childNodes:
            yield from self._compile_node(c)


def make_text_node(text, guard=None):
    """Return a TranslatableTextNode if the text is not empty,
    otherwise a regular TextNode.

    This avoid spending the cost of translating empty nodes.
    """
    if text.strip():
        return ir.TranslatableTextNode(text, guard)
    return ir.TextNode(text, guard)


class _TextCompiler:
    """Separates expressions such as ${some_var} from the ordinary text
    around them in the template source and generates :class:`.ir.ExprNode`
    instances and :class:`.ir.TextNode` instances accordingly.
    """

    def __init__(
        self,
        filename,
        source,
        lineno,
        node_type=make_text_node,
        in_html_attr=False,  # noqa: FBT002
        compiler_instance=None,
    ):
        self.filename = filename
        self.source = source
        self.orig_lineno = lineno
        self.lineno = 0
        self.pos = 0
        self.node_type = node_type
        self.in_html_attr = in_html_attr
        self.compiler_instance = compiler_instance
        self.doc = self.compiler_instance.doc

    def text(self, text):
        node = self.node_type(text)
        node.lineno = self.real_lineno
        self.lineno += text.count("\n")
        return node

    def expr(self, text):
        # *safe* being True here avoids escaping twice, since
        # HTML attributes are always escaped in the end.
        node = ir.ExprNode(text, safe=self.in_html_attr)
        node.lineno = self.real_lineno
        self.lineno += text.count("\n")
        return node

    @property
    def real_lineno(self):
        return self.orig_lineno + self.lineno

    _pattern = r"""
    \$(?:
        (?P<expr_named>[_a-z][_a-z0-9.]*) | # $foo.bar
        {(?P<expr_braced>) | # ${....
        \$ # $$ -> $
    )"""
    _re_pattern = re.compile(_pattern, re.VERBOSE | re.IGNORECASE | re.MULTILINE)

    def __iter__(self):
        source = self.source
        for mo in self._re_pattern.finditer(source):
            start = mo.start()
            if start > self.pos:
                yield self.text(source[self.pos : start])
            self.pos = start
            groups = mo.groupdict()
            if groups["expr_braced"] is not None:
                self.pos = mo.end()
                yield self._get_braced_expr()
            elif groups["expr_named"] is not None:
                self.pos = mo.end()
                yield self.expr(groups["expr_named"])
            else:
                # handle $$ and $ followed by anything that is neither a valid
                # variable name or braced expression
                self.pos = mo.end()
                yield self.text("$")
        if self.pos != len(source):
            yield self.text(source[self.pos :])

    def _get_braced_expr(self):
        # see https://github.com/nandoflorestan/kajiki/pull/38
        # Trying to get the position of a closing } in braced expressions
        # So, self.source can be something like `1+1=${1+1} ahah`
        # in this case this function gets called only once with
        # self.pos equal to 6 this function must return the result of
        # self.expr('1+1') and must set self.pos to 9
        def py_expr(end=None):
            return self.source[self.pos : end]

        try:
            self.pos += len(py_expr()) - len(py_expr().lstrip())
            compile(py_expr(), "find_}", "eval")
        except SyntaxError as se:
            end = sum(
                [self.pos, se.offset]
                + [len(line) + 1 for idx, line in enumerate(py_expr().splitlines()) if idx < se.lineno - 1]
            )
            if py_expr(end)[-1] != "}":
                # for example unclosed strings
                msg = f"Kajiki can't compile the python expression `{py_expr()[:-1]}`"
                raise XMLTemplateCompileError(
                    msg,
                    doc=self.doc,
                    filename=self.filename,
                    linen=self.lineno,
                ) from None

            # if the expression ends in a } then it may be valid
            try:
                compile(py_expr(end - 1), "check_validity", "eval")
            except SyntaxError:
                # for example + operators with a single operand
                msg = f"Kajiki detected an invalid python expression `{py_expr()[:-1]}`"
                raise XMLTemplateCompileError(
                    msg,
                    doc=self.doc,
                    filename=self.filename,
                    linen=self.lineno,
                ) from None

            py_text = py_expr(end - 1)
            self.pos = end
            return self.expr(py_text)
        else:
            msg = "Braced expression not terminated"
            raise XMLTemplateCompileError(
                msg,
                doc=self.doc,
                filename=self.filename,
                linen=self.lineno,
            )


class _Parser(sax.ContentHandler):
    """Parse an XML template into a Tree of DOM Nodes.

    Nodes should then be passed to a `_Compiler` to be
    converted into the intermediate representation and
    then to Python Code.
    """

    DTD = '<!DOCTYPE kajiki SYSTEM "kajiki.dtd">'

    def __init__(self, filename, source):
        """XML defines only a few entities; HTML defines many more.
        The XML parser errors out when it finds HTML entities, unless the
        template contains a reference to an external DTD (in which case
        skippedEntity() gets called, this is what we want). In other words,
        we want to trick expat into parsing XML + HTML entities for us.
        We wouldn't force our users to declare everyday HTML entities!

        So, for the parsing stage, we detect the doctype in the template and
        replace it with our own; then in the compiling stage we put the
        user's doctype back in. The XML parser is thus tricked and nobody
        needs to know this implementation detail of Kajiki.
        """
        sax.ContentHandler.__init__(self)
        if not isinstance(source, str):
            msg = "The template source must be a unicode string."
            raise TypeError(msg)
        self._els = []
        self._doc = dom.Document()
        self._filename = filename
        # Store the original DTD in the document for the compiler to use later
        self._doc._dtd, position, source = extract_dtd(source)  # noqa: SLF001
        # Use our own DTD just for XML parsing
        self._source = source[:position] + self.DTD + source[position:]
        self._cdata_stack = []

    def parse(self):
        """Parse an XML/HTML document to its DOM representation."""
        self._parser = parser = sax.make_parser()  # noqa: S317
        parser.setFeature(sax.handler.feature_external_pes, False)
        parser.setFeature(sax.handler.feature_external_ges, False)
        parser.setFeature(sax.handler.feature_namespaces, False)
        parser.setProperty(sax.handler.property_lexical_handler, self)
        parser.setContentHandler(self)
        source = sax.xmlreader.InputSource()
        source.setCharacterStream(io.StringIO(self._source))
        source.setSystemId(self._filename)

        try:
            parser.parse(source)
        except SAXParseException as e:
            raise XMLTemplateParseError(
                e.getMessage(),
                self._source,
                self._filename,
                e.getLineNumber(),
                e.getColumnNumber(),
            ) from None

        self._doc._source = self._source  # noqa: SLF001
        return self._doc

    # ContentHandler implementation
    def startDocument(self):  # noqa: N802
        self._els.append(self._doc)

    def startElement(self, name, attrs):  # noqa: N802
        el = self._doc.createElement(name)
        el.lineno = self._parser.getLineNumber()
        for k, v in attrs.items():
            el.setAttribute(k, v)
        self._els[-1].appendChild(el)
        self._els.append(el)

    def endElement(self, name):  # noqa: N802
        popped = self._els.pop()
        assert name == popped.tagName  # noqa: S101

    def characters(self, content):
        should_escape = not self._cdata_stack
        if should_escape:
            content = sax.saxutils.escape(content)
        node = self._doc.createTextNode(content)
        node.lineno = self._parser.getLineNumber()
        node.escaped = should_escape
        self._els[-1].appendChild(node)

    def processingInstruction(self, target, data):  # noqa: N802
        node = self._doc.createProcessingInstruction(target, data)
        node.lineno = self._parser.getLineNumber()
        self._els[-1].appendChild(node)

    def skippedEntity(self, name):  # noqa: N802
        # Deals with an HTML entity such as &nbsp; (XML itself defines
        # very few entities.)

        # The presence of a SYSTEM doctype makes expat say "hey, that
        # MIGHT be a valid entity, better pass it along to sax and
        # find out!" (Since expat is nonvalidating, it never reads the
        # external doctypes.)
        if name and name[-1] != ";":
            # In entities.html5 sometimes the entities are recorded
            # with/without semicolon. That list is copied from cPython
            # itself, and we don't want to maintain a separate diff.
            # So just ensure we ask for entities always recorded with
            # trailing semicolon.
            name += ";"
        return self.characters(html.entities.html5[name])

    @abc.abstractmethod
    def startElementNS(self, name, qname, attrs):  # noqa: N802
        pass

    @abc.abstractmethod
    def endElementNS(self, name, qname):  # noqa: N802
        pass

    @abc.abstractmethod
    def startPrefixMapping(self, prefix, uri):  # noqa: N802
        pass

    @abc.abstractmethod
    def endPrefixMapping(self, prefix):  # noqa: N802
        pass

    # LexicalHandler implementation
    def comment(self, text):
        node = self._doc.createComment(text)
        node.lineno = self._parser.getLineNumber()
        self._els[-1].appendChild(node)

    def startCDATA(self):  # noqa: N802
        node = self._doc.createTextNode("<![CDATA[")
        node._cdata = True  # noqa: SLF001
        node.lineno = self._parser.getLineNumber()
        self._els[-1].appendChild(node)
        self._cdata_stack.append(self._els[-1])

    def endCDATA(self):  # noqa: N802
        node = self._doc.createTextNode("]]>")
        node._cdata = True  # noqa: SLF001
        node.lineno = self._parser.getLineNumber()
        self._els[-1].appendChild(node)
        self._cdata_stack.pop()

    def startDTD(self, name, pubid, sysid):  # noqa: N802
        self._doc.doctype = impl.createDocumentType(name, pubid, sysid)

    def endDTD(self):  # noqa: N802
        pass


class _DomTransformer:
    """Applies standard Kajiki transformations to a parsed document.

    Given a document generated by :class:`.Parser` it applies some
    node transformations that are necessary before applying the
    compilation steps to achieve result we usually expect.

    This includes things like squashing consecutive text nodes
    and expanding ``py:`` directives.

    The Transformer mutates the original document.
    """

    def __init__(self, doc, strip_text=True):  # noqa: FBT002
        self._transformed = False
        self.doc = doc
        self._strip_text = strip_text

    def transform(self):
        """Applies all the DOM transformations to the document.

        Calling this twice will do nothing as the result is persisted.
        """
        if self._transformed:
            return self.doc

        self.doc = self._expand_directives(self.doc)
        self.doc = self._merge_text_nodes(self.doc)
        self.doc = self._extract_nodes_leading_and_trailing_spaces(self.doc)
        if self._strip_text:
            self.doc = self._strip_text_nodes(self.doc)
        return self.doc

    @classmethod
    def _merge_text_nodes(cls, tree):
        """Merges consecutive TextNodes into a single TextNode.

        Nodes are replaced with a new node whose data contains the
        concatenation of all replaced nodes data.
        Any other node (including CDATA TextNodes) splits runs of TextNodes.
        """
        if isinstance(tree, dom.Document):
            cls._merge_text_nodes(tree.firstChild)
            return tree
        if not isinstance(getattr(tree, "tagName", None), str):
            return tree

        # Squash all successive text nodes into a single one.
        merge_node = None
        for child in list(tree.childNodes):
            if isinstance(child, dom.Text) and not getattr(child, "_cdata", False):
                if merge_node is None:
                    merge_node = child.ownerDocument.createTextNode(child.data)
                    merge_node.lineno = child.lineno
                    merge_node.escaped = child.escaped
                    tree.replaceChild(newChild=merge_node, oldChild=child)
                else:
                    merge_node.data = merge_node.data + child.data
                    tree.removeChild(child)
            else:
                merge_node = None

        # Apply squashing to all children of current node.
        for child in tree.childNodes:
            if not isinstance(child, dom.Text):
                cls._merge_text_nodes(child)

        return tree

    @classmethod
    def _extract_nodes_leading_and_trailing_spaces(cls, tree):
        """Extract the leading and trailing spaces of TextNodes to
        separate nodes.

        This is explicitly intended to make i18n easier, as we don't
        want people having to pay attention to spaces at being and end
        of text when translating it. So those are always extracted and
        only the meaningful part is preserved for translation.
        """
        for child in tree.childNodes:
            if isinstance(child, dom.Text):
                if not getattr(child, "_cdata", False):
                    if not child.data.strip():
                        # Already a totally empty node, do nothing...
                        continue

                    lstripped_data = child.data.lstrip()
                    if len(lstripped_data) != len(child.data):
                        # There is text to strip at begin, create a
                        # new text node with empty space
                        empty_text_len = len(child.data) - len(lstripped_data)
                        empty_text = child.data[:empty_text_len]
                        begin_node = child.ownerDocument.createTextNode(empty_text)
                        begin_node.lineno = child.lineno
                        begin_node.escaped = child.escaped
                        tree.insertBefore(newChild=begin_node, refChild=child)
                        child.lineno += child.data[:empty_text_len].count("\n")
                        child.data = lstripped_data

                    rstripped_data = child.data.rstrip()
                    if len(rstripped_data) != len(child.data):
                        # There is text to strip at end, create a new
                        # text node with empty space
                        empty_text_len = len(child.data) - len(rstripped_data)
                        empty_text = child.data[-empty_text_len:]
                        end_node = child.ownerDocument.createTextNode(empty_text)
                        end_node.lineno = child.lineno + child.data[:-empty_text_len].count("\n")
                        end_node.escaped = child.escaped
                        tree.replaceChild(newChild=end_node, oldChild=child)
                        tree.insertBefore(newChild=child, refChild=end_node)
                        child.data = rstripped_data
            else:
                cls._extract_nodes_leading_and_trailing_spaces(child)
        return tree

    @classmethod
    def _strip_text_nodes(cls, tree):
        """Strips empty characters in all text nodes."""
        for child in tree.childNodes:
            if isinstance(child, dom.Text):
                if not getattr(child, "_cdata", False):
                    # Move lineno forward the amount of lines we are
                    # going to strip.
                    lstripped_data = child.data.lstrip()
                    child.lineno += child.data[: len(child.data) - len(lstripped_data)].count("\n")
                    child.data = child.data.strip()
            else:
                cls._strip_text_nodes(child)
        return tree

    @classmethod
    def _expand_directives(cls, tree, parent=None):
        """Expands directives attached to nodes into separate nodes.

        This will convert all instances of::

            <div py:if="check">
            </div>

        into::

            <py:if test="check">
                <div>
                </div>
            </py:if>

        This ensures that whenever a template is processed there is no
        difference between the two formats as the Compiler will always
        receive the latter.
        """
        if isinstance(tree, dom.Document):
            cls._expand_directives(tree.firstChild, tree)
            return tree
        if not isinstance(getattr(tree, "tagName", None), str):
            return tree
        if tree.tagName in QDIRECTIVES_DICT:
            attrs = QDIRECTIVES_DICT[tree.tagName]
            if not isinstance(attrs, tuple):
                attrs = [attrs]
            for attr in attrs:
                tree.setAttribute(tree.tagName, tree.getAttribute(attr))
            tree.tagName = "py:nop"
        if tree.tagName != "py:nop" and tree.hasAttribute("py:extends"):
            value = tree.getAttribute("py:extends")
            el = tree.ownerDocument.createElement("py:extends")
            el.setAttribute("href", value)
            el.lineno = tree.lineno
            tree.removeAttribute("py:extends")
            tree.childNodes.insert(0, el)
        for directive, attr in QDIRECTIVES:
            if not tree.hasAttribute(directive):
                continue
            value = tree.getAttribute(directive)
            tree.removeAttribute(directive)
            # nsmap = (parent is not None) and parent.nsmap or tree.nsmap
            el = tree.ownerDocument.createElement(directive)
            el.lineno = tree.lineno
            if isinstance(attr, tuple):
                # eg: handle bare py:case tags
                for at in attr:
                    el.setAttribute(at, dict(tree.attributes.items()).get(at))
                if directive == "py:case" and tree.nodeName != "py:case":
                    if tree.parentNode.nodeName == "py:match" or "py:match" in tree.parentNode.attributes:
                        at = "on"
                    else:
                        at = "value"
                    el.setAttribute(at, value)
            elif attr:
                el.setAttribute(attr, value)
            # el.setsourceline = tree.sourceline
            parent.replaceChild(newChild=el, oldChild=tree)
            el.appendChild(tree)
            cls._expand_directives(tree, el)
            return el
        for child in tree.childNodes:
            cls._expand_directives(child, tree)
        return tree


class XMLTemplateError(template.KajikiTemplateError):
    """Base class for all Parse/Compile errors."""


class XMLTemplateCompileError(XMLTemplateError):
    """Error for failed template constraints.

    This is used to signal directives in contexts where
    they are invalid or any kajiki template constraint
    that fails in the provided template code.
    """

    def __init__(self, msg, doc, filename, linen):
        super().__init__(msg, getattr(doc, "_source", ""), filename, linen, 0)


class XMLTemplateParseError(XMLTemplateError):
    """Error while parsing template XML.

    Signals an invalid XML error in the provided template code.
    """