ODFPY  1.2.0
 All Classes Namespaces Files Functions Variables
element.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2007-2010 Søren Roug, European Environment Agency
4 #
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
9 #
10 # This library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
14 #
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with this library; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #
19 # Contributor(s):
20 #
21 
22 # Note: This script has copied a lot of text from xml.dom.minidom.
23 # Whatever license applies to that file also applies to this file.
24 #
25 import sys, os.path
26 sys.path.append(os.path.dirname(__file__))
27 import xml.dom
28 from xml.dom.minicompat import *
29 from namespaces import nsdict
30 import grammar
31 from attrconverters import AttrConverters
32 
33 if sys.version_info.major == 3:
34  unicode=str # unicode function does not exist
35 
36 # The following code is pasted form xml.sax.saxutils
37 # Tt makes it possible to run the code without the xml sax package installed
38 # To make it possible to have <rubbish> in your text elements, it is necessary to escape the texts
39 ##
40 # Escape &, <, and > in a string of data.
41 #
42 # You can escape other strings of data by passing a dictionary as
43 # the optional entities parameter. The keys and values must all be
44 # strings; each key will be replaced with its corresponding value.
45 #
46 def _escape(data, entities={}):
47  data = data.replace("&", "&amp;")
48  data = data.replace("<", "&lt;")
49  data = data.replace(">", "&gt;")
50  for chars, entity in entities.items():
51  data = data.replace(chars, entity)
52  return data
53 
54 ##
55 # Escape and quote an attribute value.
56 #
57 # Escape &, <, and > in a string of data, then quote it for use as
58 # an attribute value. The \" character will be escaped as well, if
59 # necessary.
60 #
61 # You can escape other strings of data by passing a dictionary as
62 # the optional entities parameter. The keys and values must all be
63 # strings; each key will be replaced with its corresponding value.
64 #
65 def _quoteattr(data, entities={}):
66  entities['\n']='&#10;'
67  entities['\r']='&#12;'
68  data = _escape(data, entities)
69  if '"' in data:
70  if "'" in data:
71  data = '"%s"' % data.replace('"', "&quot;")
72  else:
73  data = "'%s'" % data
74  else:
75  data = '"%s"' % data
76  return data
77 
78 ##
79 # Split a qualified name into namespace part and local part.
80 def _nssplit(qualifiedName):
81  fields = qualifiedName.split(':', 1)
82  if len(fields) == 2:
83  return fields
84  else:
85  return (None, fields[0])
86 
87 def _nsassign(namespace):
88  return nsdict.setdefault(namespace,"ns" + str(len(nsdict)))
89 
90 
91 # Exceptions
92 ##
93 # Complains if you add an element to a parent where it is not allowed
95 ##
96 # Complains if you add text or cdata to an element where it is not allowed
97 class IllegalText(Exception):
98 
99 ##
100 # super class for more specific nodes
101 class Node(xml.dom.Node):
102  parentNode = None
103  nextSibling = None
104  previousSibling = None
105 
106  ##
107  # Tells whether this element has any children; text nodes,
108  # subelements, whatever.
109  #
110  def hasChildNodes(self):
111  if self.childNodes:
112  return True
113  else:
114  return False
115 
116  def _get_childNodes(self):
117  return self.childNodes
118 
119  def _get_firstChild(self):
120  if self.childNodes:
121  return self.childNodes[0]
122 
123  def _get_lastChild(self):
124  if self.childNodes:
125  return self.childNodes[-1]
126 
127  ##
128  # Inserts the node newChild before the existing child node refChild.
129  # If refChild is null, insert newChild at the end of the list of children.
130  #
131  def insertBefore(self, newChild, refChild):
132  if newChild.nodeType not in self._child_node_types:
133  raise IllegalChild( "%s cannot be child of %s" % (newChild.tagName, self.tagName))
134  if newChild.parentNode is not None:
135  newChild.parentNode.removeChild(newChild)
136  if refChild is None:
137  self.appendChild(newChild)
138  else:
139  try:
140  index = self.childNodes.index(refChild)
141  except ValueError:
142  raise xml.dom.NotFoundErr()
143  self.childNodes.insert(index, newChild)
144  newChild.nextSibling = refChild
145  refChild.previousSibling = newChild
146  if index:
147  node = self.childNodes[index-1]
148  node.nextSibling = newChild
149  newChild.previousSibling = node
150  else:
151  newChild.previousSibling = None
152  newChild.parentNode = self
153  return newChild
154 
155  ##
156  # Adds the node newChild to the end of the list of children of this node.
157  # If the newChild is already in the tree, it is first removed.
158  #
159  def appendChild(self, newChild):
160  if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
161  for c in tuple(newChild.childNodes):
162  self.appendChild(c)
163  ### The DOM does not clearly specify what to return in this case
164  return newChild
165  if newChild.nodeType not in self._child_node_types:
166  raise IllegalChild( "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName))
167  if newChild.parentNode is not None:
168  newChild.parentNode.removeChild(newChild)
169  _append_child(self, newChild)
170  newChild.nextSibling = None
171  return newChild
172 
173  ##
174  # Removes the child node indicated by oldChild from the list of children, and returns it.
175  #
176  def removeChild(self, oldChild):
177  #FIXME: update ownerDocument.element_dict or find other solution
178  try:
179  self.childNodes.remove(oldChild)
180  except ValueError:
181  raise xml.dom.NotFoundErr()
182  if oldChild.nextSibling is not None:
183  oldChild.nextSibling.previousSibling = oldChild.previousSibling
184  if oldChild.previousSibling is not None:
185  oldChild.previousSibling.nextSibling = oldChild.nextSibling
186  oldChild.nextSibling = oldChild.previousSibling = None
187  if self.ownerDocument:
188  self.ownerDocument.clear_caches()
189  oldChild.parentNode = None
190  return oldChild
191 
192  def __str__(self):
193  val = []
194  for c in self.childNodes:
195  val.append(str(c))
196  return ''.join(val)
197 
198  def __unicode__(self):
199  val = []
200  for c in self.childNodes:
201  val.append(unicode(c))
202  return u''.join(val)
203 
204 defproperty(Node, "firstChild", doc="First child node, or None.")
205 defproperty(Node, "lastChild", doc="Last child node, or None.")
206 
207 def _append_child(self, node):
208  # fast path with less checks; usable by DOM builders if careful
209  childNodes = self.childNodes
210  if childNodes:
211  last = childNodes[-1]
212  node.__dict__["previousSibling"] = last
213  last.__dict__["nextSibling"] = node
214  childNodes.append(node)
215  node.__dict__["parentNode"] = self
216 
217 ##
218 # Mixin that makes childless-ness easy to implement and avoids
219 # the complexity of the Node methods that deal with children.
220 #
221 class Childless:
222 
223  attributes = None
224  childNodes = EmptyNodeList()
225  firstChild = None
226  lastChild = None
227 
228  def _get_firstChild(self):
229  return None
230 
231  def _get_lastChild(self):
232  return None
233 
234  ##
235  # Raises an error
236  def appendChild(self, node):
237  raise xml.dom.HierarchyRequestErr(
238  self.tagName + " nodes cannot have children")
239 
240  def hasChildNodes(self):
241  return False
242 
243  ##
244  # Raises an error
245  def insertBefore(self, newChild, refChild):
246  raise xml.dom.HierarchyRequestErr(
247  self.tagName + " nodes do not have children")
248 
249  ##
250  # Raises an error
251  def removeChild(self, oldChild):
252  raise xml.dom.NotFoundErr(
253  self.tagName + " nodes do not have children")
254 
255  ##
256  # Raises an error
257  def replaceChild(self, newChild, oldChild):
258  raise xml.dom.HierarchyRequestErr(
259  self.tagName + " nodes do not have children")
260 
262  nodeType = Node.TEXT_NODE
263  tagName = "Text"
264 
265  def __init__(self, data):
266  self.data = data
267 
268  def __str__(self):
269  return self.data
270 
271  def __unicode__(self):
272  return self.data
273 
274  ##
275  # Write XML in UTF-8
276  def toXml(self,level,f):
277  if self.data:
278  f.write(_escape(unicode(self.data)))
279 
281  nodeType = Node.CDATA_SECTION_NODE
282 
283  ##
284  # Generate XML output of the node. If the text contains "]]>", then
285  # escape it by going out of CDATA mode (]]>), then write the string
286  # and then go into CDATA mode again. (<![CDATA[)
287  #
288  def toXml(self,level,f):
289  if self.data:
290  f.write('<![CDATA[%s]]>' % self.data.replace(']]>',']]>]]><![CDATA['))
291 
292 ##
293 # Creates a arbitrary element and is intended to be subclassed not used on its own.
294 # This element is the base of every element it defines a class which resembles
295 # a xml-element. The main advantage of this kind of implementation is that you don't
296 # have to create a toXML method for every different object. Every element
297 # consists of an attribute, optional subelements, optional text and optional cdata.
298 #
299 class Element(Node):
300 
301  nodeType = Node.ELEMENT_NODE
302  namespaces = {} # Due to shallow copy this is a static variable
303 
304  _child_node_types = (Node.ELEMENT_NODE,
305  Node.PROCESSING_INSTRUCTION_NODE,
306  Node.COMMENT_NODE,
307  Node.TEXT_NODE,
308  Node.CDATA_SECTION_NODE,
309  Node.ENTITY_REFERENCE_NODE)
310 
311  def __init__(self, attributes=None, text=None, cdata=None, qname=None, qattributes=None, check_grammar=True, **args):
312  if qname is not None:
313  self.qname = qname
314  assert(hasattr(self, 'qname'))
315  self.ownerDocument = None
316  self.childNodes=[]
317  self.allowed_children = grammar.allowed_children.get(self.qname)
318  prefix = self.get_nsprefix(self.qname[0])
319  self.tagName = prefix + ":" + self.qname[1]
320  if text is not None:
321  self.addText(text)
322  if cdata is not None:
323  self.addCDATA(cdata)
324 
325  allowed_attrs = self.allowed_attributes()
326  if allowed_attrs is not None:
327  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
328  self.attributes={}
329  # Load the attributes from the 'attributes' argument
330  if attributes:
331  for attr, value in attributes.items():
332  self.setAttribute(attr, value)
333  # Load the qualified attributes
334  if qattributes:
335  for attr, value in qattributes.items():
336  self.setAttrNS(attr[0], attr[1], value)
337  if allowed_attrs is not None:
338  # Load the attributes from the 'args' argument
339  for arg in args.keys():
340  self.setAttribute(arg, args[arg])
341  else:
342  for arg in args.keys(): # If any attribute is allowed
343  self.attributes[arg]=args[arg]
344  if not check_grammar:
345  return
346  # Test that all mandatory attributes have been added.
347  required = grammar.required_attributes.get(self.qname)
348  if required:
349  for r in required:
350  if self.getAttrNS(r[0],r[1]) is None:
351  raise AttributeError( "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName))
352 
353  ##
354  # Odfpy maintains a list of known namespaces. In some cases a prefix is used, and
355  # we need to know which namespace it resolves to.
356  #
357  def get_knownns(self, prefix):
358  global nsdict
359  for ns,p in nsdict.items():
360  if p == prefix: return ns
361  return None
362 
363  ##
364  # Odfpy maintains a list of known namespaces. In some cases we have a namespace URL,
365  # and needs to look up or assign the prefix for it.
366  #
367  def get_nsprefix(self, namespace):
368  if namespace is None: namespace = ""
369  prefix = _nsassign(namespace)
370  if not namespace in self.namespaces:
371  self.namespaces[namespace] = prefix
372  return prefix
373 
375  return grammar.allowed_attributes.get(self.qname)
376 
377  def _setOwnerDoc(self, element):
378  element.ownerDocument = self.ownerDocument
379  for child in element.childNodes:
380  self._setOwnerDoc(child)
381 
382  ##
383  # adds an element to an Element
384  #
385  # Element.addElement(Element)
386  #
387  def addElement(self, element, check_grammar=True):
388  if check_grammar and self.allowed_children is not None:
389  if element.qname not in self.allowed_children:
390  raise IllegalChild( "<%s> is not allowed in <%s>" % ( element.tagName, self.tagName))
391  self.appendChild(element)
392  self._setOwnerDoc(element)
393  if self.ownerDocument:
394  self.ownerDocument.rebuild_caches(element)
395 
396  ##
397  # Adds text to an element
398  # Setting check_grammar=False turns off grammar checking
399  #
400  def addText(self, text, check_grammar=True):
401  if check_grammar and self.qname not in grammar.allows_text:
402  raise IllegalText( "The <%s> element does not allow text" % self.tagName)
403  else:
404  if text != '':
405  self.appendChild(Text(text))
406 
407  ##
408  # Adds CDATA to an element
409  # Setting check_grammar=False turns off grammar checking
410  #
411  def addCDATA(self, cdata, check_grammar=True):
412  if check_grammar and self.qname not in grammar.allows_text:
413  raise IllegalText( "The <%s> element does not allow text" % self.tagName)
414  else:
415  self.appendChild(CDATASection(cdata))
416 
417  ##
418  # Removes an attribute by name.
419  def removeAttribute(self, attr, check_grammar=True):
420  allowed_attrs = self.allowed_attributes()
421  if allowed_attrs is None:
422  if type(attr) == type(()):
423  prefix, localname = attr
424  self.removeAttrNS(prefix, localname)
425  else:
426  raise AttributeError( "Unable to add simple attribute - use (namespace, localpart)")
427  else:
428  # Construct a list of allowed arguments
429  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
430  if check_grammar and attr not in allowed_args:
431  raise AttributeError( "Attribute %s is not allowed in <%s>" % ( attr, self.tagName))
432  i = allowed_args.index(attr)
433  self.removeAttrNS(allowed_attrs[i][0], allowed_attrs[i][1])
434 
435  ##
436  # Add an attribute to the element
437  # This is sort of a convenience method. All attributes in ODF have
438  # namespaces. The library knows what attributes are legal and then allows
439  # the user to provide the attribute as a keyword argument and the
440  # library will add the correct namespace.
441  # Must overwrite, If attribute already exists.
442  #
443  def setAttribute(self, attr, value, check_grammar=True):
444  allowed_attrs = self.allowed_attributes()
445  if allowed_attrs is None:
446  if type(attr) == type(()):
447  prefix, localname = attr
448  self.setAttrNS(prefix, localname, value)
449  else:
450  raise AttributeError( "Unable to add simple attribute - use (namespace, localpart)")
451  else:
452  # Construct a list of allowed arguments
453  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
454  if check_grammar and attr not in allowed_args:
455  raise AttributeError( "Attribute %s is not allowed in <%s>" % ( attr, self.tagName))
456  i = allowed_args.index(attr)
457  self.setAttrNS(allowed_attrs[i][0], allowed_attrs[i][1], value)
458 
459  ##
460  # Add an attribute to the element
461  # In case you need to add an attribute the library doesn't know about
462  # then you must provide the full qualified name
463  # It will not check that the attribute is legal according to the schema.
464  # Must overwrite, If attribute already exists.
465  #
466  def setAttrNS(self, namespace, localpart, value):
467  allowed_attrs = self.allowed_attributes()
468  prefix = self.get_nsprefix(namespace)
469 # if allowed_attrs and (namespace, localpart) not in allowed_attrs:
470 # raise AttributeError( "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName))
471  c = AttrConverters()
472  self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
473 
474  ##
475  #
476  # gets an attribute, given a namespace and a key
477  # @param namespace a unicode string or a bytes: the namespace
478  # @param localpart a unicode string or a bytes:
479  # the key to get the attribute
480  # @return an attribute as a unicode string or a bytes: if both paramters
481  # are byte strings, it will be a bytes; if both attributes are
482  # unicode strings, it will be a unicode string
483  #
484  def getAttrNS(self, namespace, localpart):
485  prefix = self.get_nsprefix(namespace)
486  result = self.attributes.get((namespace, localpart))
487 
488  assert(
489  (type(namespace), type(namespace), type(namespace) == \
490  type(b""), type(b""), type(b"")) or
491  (type(namespace), type(namespace), type(namespace) == \
492  type(u""), type(u""), type(u""))
493  )
494 
495  return result
496 
497  def removeAttrNS(self, namespace, localpart):
498  del self.attributes[(namespace, localpart)]
499 
500  ##
501  # Get an attribute value. The method knows which namespace the attribute is in
502  #
503  def getAttribute(self, attr):
504  allowed_attrs = self.allowed_attributes()
505  if allowed_attrs is None:
506  if type(attr) == type(()):
507  prefix, localname = attr
508  return self.getAttrNS(prefix, localname)
509  else:
510  raise AttributeError( "Unable to get simple attribute - use (namespace, localpart)")
511  else:
512  # Construct a list of allowed arguments
513  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
514  i = allowed_args.index(attr)
515  return self.getAttrNS(allowed_attrs[i][0], allowed_attrs[i][1])
516 
517  def write_open_tag(self, level, f):
518  f.write(('<'+self.tagName))
519  if level == 0:
520  for namespace, prefix in self.namespaces.items():
521  f.write(u' xmlns:' + prefix + u'="'+ _escape(str(namespace))+'"')
522  for qname in self.attributes.keys():
523  prefix = self.get_nsprefix(qname[0])
524  f.write(u' '+_escape(str(prefix+u':'+qname[1]))+u'='+_quoteattr(unicode(self.attributes[qname])))
525  f.write(u'>')
526 
527  def write_close_tag(self, level, f):
528  f.write('</'+self.tagName+'>')
529 
530  ##
531  #
532  # Generate an XML stream out of the tree structure
533  # @param level integer: level in the XML tree; zero at root of the tree
534  # @param f an open writable file able to accept unicode strings
535  #
536  def toXml(self, level, f):
537  f.write(u'<'+self.tagName)
538  if level == 0:
539  for namespace, prefix in self.namespaces.items():
540  f.write(u' xmlns:' + prefix + u'="'+ _escape(str(namespace))+u'"')
541  for qname in self.attributes.keys():
542  prefix = self.get_nsprefix(qname[0])
543  f.write(u' '+_escape(unicode(prefix+':'+qname[1]))+u'='+_quoteattr(unicode(self.attributes[qname])))
544  if self.childNodes:
545  f.write(u'>')
546  for element in self.childNodes:
547  element.toXml(level+1,f)
548  f.write(u'</'+self.tagName+'>')
549  else:
550  f.write(u'/>')
551 
552  def _getElementsByObj(self, obj, accumulator):
553  if self.qname == obj.qname:
554  accumulator.append(self)
555  for e in self.childNodes:
556  if e.nodeType == Node.ELEMENT_NODE:
557  accumulator = e._getElementsByObj(obj, accumulator)
558  return accumulator
559 
560  ##
561  # Gets elements based on the type, which is function from text.py, draw.py etc.
562  def getElementsByType(self, element):
563  obj = element(check_grammar=False)
564  return self._getElementsByObj(obj,[])
565 
566  ##
567  # This is a check to see if the object is an instance of a type
568  def isInstanceOf(self, element):
569  obj = element(check_grammar=False)
570  return self.qname == obj.qname
571 
572 
def removeChild
Raises an error.
Definition: element.py:251
def toXml
Generate XML output of the node.
Definition: element.py:288
def get_nsprefix
Odfpy maintains a list of known namespaces.
Definition: element.py:367
super class for more specific nodes
Definition: element.py:101
def toXml
Write XML in UTF-8.
Definition: element.py:276
def addElement
adds an element to an Element
Definition: element.py:387
def isInstanceOf
This is a check to see if the object is an instance of a type.
Definition: element.py:568
def getAttribute
Get an attribute value.
Definition: element.py:503
def addText
Adds text to an element Setting check_grammar=False turns off grammar checking.
Definition: element.py:400
Complains if you add text or cdata to an element where it is not allowed.
Definition: element.py:97
def replaceChild
Raises an error.
Definition: element.py:257
def getAttrNS
gets an attribute, given a namespace and a key
Definition: element.py:484
Complains if you add an element to a parent where it is not allowed.
Definition: element.py:94
def removeAttribute
Removes an attribute by name.
Definition: element.py:419
Mixin that makes childless-ness easy to implement and avoids the complexity of the Node methods that ...
Definition: element.py:221
def appendChild
Adds the node newChild to the end of the list of children of this node.
Definition: element.py:159
def setAttribute
Add an attribute to the element This is sort of a convenience method.
Definition: element.py:443
def toXml
Generate an XML stream out of the tree structure.
Definition: element.py:536
def insertBefore
Inserts the node newChild before the existing child node refChild.
Definition: element.py:131
def insertBefore
Raises an error.
Definition: element.py:245
Creates a arbitrary element and is intended to be subclassed not used on its own. ...
Definition: element.py:299
def hasChildNodes
Tells whether this element has any children; text nodes, subelements, whatever.
Definition: element.py:110
def get_knownns
Odfpy maintains a list of known namespaces.
Definition: element.py:357
def getElementsByType
Gets elements based on the type, which is function from text.py, draw.py etc.
Definition: element.py:562
def addCDATA
Adds CDATA to an element Setting check_grammar=False turns off grammar checking.
Definition: element.py:411
def removeChild
Removes the child node indicated by oldChild from the list of children, and returns it...
Definition: element.py:176
def appendChild
Raises an error.
Definition: element.py:236
dictionary namespaces
Definition: element.py:302
def setAttrNS
Add an attribute to the element In case you need to add an attribute the library doesn't know about t...
Definition: element.py:466