File: TextWriter.py

package info (click to toggle)
python-xml 0.8.4-10.1%2Blenny1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 4,972 kB
  • ctags: 10,628
  • sloc: python: 46,730; ansic: 14,354; xml: 968; makefile: 201; sh: 20
file content (244 lines) | stat: -rw-r--r-- 9,745 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
########################################################################
#
# File Name:            TextWriter.py
#
#
"""
Implement the core Writer for XSLT processor output
WWW: http://4suite.com/4XSLT        e-mail: support@4suite.com

Copyright (c) 1999-2000 Fourthought Inc., USA.   All Rights Reserved.
See  http://4suite.com/COPYRIGHT  for license and copyright information
"""

import os, re, string, cStringIO
from xml.dom import EMPTY_NAMESPACE
import xml.dom.ext
from xml.dom.ext.Printer import TranslateCdata, TranslateCdataAttr
from xml.dom.html import TranslateHtmlCdata
from xml.xslt import XSL_NAMESPACE, TextSax
from xml.dom.html import HTML_4_TRANSITIONAL_INLINE, HTML_4_STRICT_INLINE
from xml.dom import XML_NAMESPACE
from xml.dom.html import HTML_FORBIDDEN_END


class ElementData:
    def __init__(self, name, cdataElement, attrs, extraNss=None):
        self.name = name
        self.cdataElement = cdataElement
        self.attrs = attrs
        self.extraNss = extraNss or {}
        return


class TextWriter:
    def __init__(self, outputParams):
        self._currElement = None
        self._namespaces = [{'': EMPTY_NAMESPACE, 'xml': XML_NAMESPACE}]
        self._result = cStringIO.StringIO()
        self._outputParams = outputParams
        self._outputParams.mediaType = outputParams.mediaType or 'text/plain'
        self._indent = ''
        self._nextNewLine = 0
        self._cdataSectionElement = 0
        self._first_element = 1
        self._strict_inline = [0]
        self._cachedPis = []
        return

    def _prolog(self, docElem):
        if self._outputParams.method == 'html' and self._outputParams.indent is None:
            self._outputParams.indent = 'yes'
        if self._outputParams.method in [None, 'xml']:
            #FIXME: Case-sensitivity?
            if self._outputParams.omitXmlDeclaration in [None,'no']:
                self._result.write("<?xml version='%s' encoding='%s'" % (self._outputParams.version, self._outputParams.encoding or 'UTF-8'))
                if self._outputParams.standalone:
                    self._result.write(" standalone='%s'" % self._outputParams.standalone)
                self._result.write("?>\n")
            if self._outputParams.doctypeSystem:
                self._result.write('<!DOCTYPE ' + docElem + ' SYSTEM "' + self._outputParams.doctypeSystem + '"')
                if self._outputParams.doctypePublic:
                    self._result.write(' PUBLIC "' + self._outputParams.doctypePublic + '"')
                self._result.write('>\n')
        for target,data in self._cachedPis:
            self._writePiOrXmlDecl(target,data)
            self._result.write('\n' + self._indent)
        self._nextNewLine = 0
        return

    def getResult(self):
        self._completeLastElement(0)
        return self._result.getvalue()

    def text(self, text, escapeOutput=1):
        self._completeLastElement(0)
        if escapeOutput:
            if text and text[0] == '>':
                self._result.seek(-2, 2)
                last_chars = self._result.read()
            else:
                last_chars = ''
            new_text = text
            if self._outputParams.method == 'html':
                new_text = TranslateHtmlCdata(
                    new_text,
                    self._outputParams.encoding or 'UTF-8',
                    last_chars
                    )
            else:
                new_text = TranslateCdata(
                    new_text,
                    self._outputParams.encoding or 'UTF-8',
                    last_chars,
                    markupSafe=self._cdataSectionElement
                    )
            self._result.write(new_text)
        else:
            self._result.write(text)
        self._nextNewLine = 0
        return

    def attribute(self, name, value, namespace=EMPTY_NAMESPACE):
        self._currElement.attrs[name] = value
        (prefix, local) = xml.dom.ext.SplitQName(name)
        if self._outputParams.method == 'xml':
            self._namespaces[-1][prefix] = namespace
        return

    def processingInstruction(self, target, data):
        if self._first_element:
            self._cachedPis.append((target,data))
            return
        self._completeLastElement(0)
        self._writePiOrXmlDecl(target,data)
        return

    def _writePiOrXmlDecl(self, target, data):
        pi = '<?%s %s?>' % (target, data)
        if self._outputParams.indent == 'yes':
            self._result.write("%s%s\n" % (self._indent, pi))
        else:
            self._result.write(pi)
        self._nextNewLine = 1
        return

    def comment(self, body):
        self._completeLastElement(0)
        if self._outputParams.indent == 'yes':
            self._result.write(self._indent + "<!--%s-->\n"%(body))
        else:
            self._result.write("<!--%s-->"%(body))
        self._nextNewLine = 1
        return

    def startElement(self, name, namespace=EMPTY_NAMESPACE, extraNss=None):
        extraNss = extraNss or {}
        self._strict_inline.append(string.upper(name) in HTML_4_STRICT_INLINE)
        if self._first_element:
            if not self._outputParams.method:
                if string.upper(name) == 'HTML':
                    self._outputParams.method = 'html'
                else:
                    self._outputParams.method = 'xml'
            self._first_element = 0
            self._prolog(name)
        self._completeLastElement(0)
        (prefix, local) = xml.dom.ext.SplitQName(name)
        cdatas_flag = 0
        if self._outputParams.method == 'xml':
            cdatas_flag = (namespace, local) in self._outputParams.cdataSectionElements
        self._currElement = ElementData(name, cdatas_flag, {}, extraNss)
        self._namespaces.append(self._namespaces[-1].copy())
        if self._outputParams.method == 'xml':
            self._namespaces[-1][prefix] = namespace
        return

    def endElement(self, name):
        if self._currElement:
            elementIsEmpty = 1
            endElementHandled = self._completeLastElement(1)
        else:
            elementIsEmpty = endElementHandled = 0
        if self._outputParams.indent == 'yes':
            self._indent = self._indent[:-2]
        if self._outputParams.method == 'xml' and self._cdataSectionElement:
            self._result.write(']]>')
            self._cdataSectionElement = 0
        if self._outputParams.method == 'html':
            if (string.upper(name) not in HTML_FORBIDDEN_END):
                if self._outputParams.indent == 'yes' and not self._strict_inline[-1]:
                    if self._nextNewLine and not elementIsEmpty:
                        self._result.write('\n' + self._indent)
                    self._result.write((not endElementHandled) and ('</%s>' % name) or '')
                else:
                    self._result.write((not endElementHandled) and ('</%s>' % name) or '')
        else:
            if self._outputParams.indent == 'yes' and self._nextNewLine and not elementIsEmpty:
                self._result.write('\n' + self._indent)
            self._result.write((not endElementHandled) and ('</%s>' % name) or '')
        self._nextNewLine = 1
        del self._namespaces[-1]
        self._strict_inline.pop()
        return

    def _completeLastElement(self, elementIsEmpty):
        endElementHandled = 1
        if self._currElement:
            elem = self._currElement
            if self._outputParams.indent == 'yes' and self._nextNewLine and not self._strict_inline[-1]:
                self._result.write('\n' + self._indent)
            self._result.write('<' + elem.name)
            encoding = self._outputParams.encoding or 'UTF-8'
            for name,value in elem.attrs.items():
                value = TranslateCdata(value, encoding)
                value, delimiter = TranslateCdataAttr(value)
                self._result.write(' %s=%s%s%s' % (name,delimiter,value,delimiter))
            if self._outputParams.method == 'xml':
                #Handle namespaces
                nss = elem.extraNss
                nss.update(self._namespaces[-1])
                for prefix in nss.keys():
                    ns = nss[prefix]
                    prev_ns = self._namespaces[-2].get(prefix, None)
                    if ns and not prev_ns:
                        if prefix:
                            self._result.write(" xmlns:%s='%s'" % (prefix, ns))
                        else:
                            self._result.write(" xmlns='%s'" % ns)
                self._namespaces[-1] = nss
            if elementIsEmpty:
                if self._outputParams.method != 'html':
                    self._result.write('/>')
                else:
                    self._result.write('>')
                    endElementHandled = 0
            else:
                self._result.write('>')
                if self._currElement.cdataElement:
                    self._result.write('<![CDATA[')
                    self._cdataSectionElement = 1
            self._nextNewLine = 1

            if self._outputParams.indent == 'yes':
                self._indent = self._indent + '  '
            self._currElement = None
        return endElementHandled




"""
Note: excerpt from Mike Kay on xsl-list 2000-06-14

In Saxon, for method="html", the highly pragmatic rules for output of
non-ASCII characters are:

for characters in the range 160-255, use an entity reference, e.g.
"&eacute;"
for other non-ASCII characters, use the native character if supported by the
selected encoding, otherwise use a numeric character reference.

There's nothing in the spec to say it has to be this way, the rules have
simply evolved to minimize the number of user complaints.
"""