ODFPY  1.2.0
 All Classes Namespaces Files Functions Variables
opendocument.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
3 #
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
13 #
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 #
18 # Contributor(s):
19 #
20 # Copyright (C) 2014 Georges Khaznadar <georgesk@debian.org>
21 # migration to Python3, JavaDOC comments and automatic
22 # build of documentation
23 #
24 
25 __doc__="""Use OpenDocument to generate your documents."""
26 
27 import zipfile, time, sys, mimetypes, copy, os.path
28 
29 # to allow Python3 to access modules in the same path
30 sys.path.append(os.path.dirname(__file__))
31 
32 # using BytesIO provides a cleaner interface than StringIO
33 # with both Python2 and Python3: the programmer must care to
34 # convert strings or unicode to bytes, which is valid for Python 2 and 3.
35 from io import StringIO, BytesIO
36 
37 from namespaces import *
38 import manifest
39 import meta
40 from office import *
41 import element
42 from attrconverters import make_NCName
43 from xml.sax.xmlreader import InputSource
44 from odfmanifest import manifestlist
45 
46 if sys.version_info.major == 3:
47  unicode=str # unicode function does not exist
48 
49 __version__= TOOLSVERSION
50 
51 _XMLPROLOGUE = u"<?xml version='1.0' encoding='UTF-8'?>\n"
52 
53 #####
54 # file permission as an integer value.
55 # The following syntax would be invalid for Python3:
56 # UNIXPERMS = 0100644 << 16L # -rw-r--r--
57 #
58 # So it has been precomputed:
59 # 2175008768 is the same value as 0100644 << 16L == -rw-r--r--
60 ####
61 UNIXPERMS = 2175008768
62 
63 IS_FILENAME = 0
64 IS_IMAGE = 1
65 # We need at least Python 2.2
66 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
67 
68 #sys.setrecursionlimit(100)
69 #The recursion limit is set conservative so mistakes like
70 # s=content() s.addElement(s) won't eat up too much processor time.
71 
72 ###############
73 # mime-types => file extensions
74 ###############
75 odmimetypes = {
76  u'application/vnd.oasis.opendocument.text': u'.odt',
77  u'application/vnd.oasis.opendocument.text-template': u'.ott',
78  u'application/vnd.oasis.opendocument.graphics': u'.odg',
79  u'application/vnd.oasis.opendocument.graphics-template': u'.otg',
80  u'application/vnd.oasis.opendocument.presentation': u'.odp',
81  u'application/vnd.oasis.opendocument.presentation-template': u'.otp',
82  u'application/vnd.oasis.opendocument.spreadsheet': u'.ods',
83  u'application/vnd.oasis.opendocument.spreadsheet-template': u'.ots',
84  u'application/vnd.oasis.opendocument.chart': u'.odc',
85  u'application/vnd.oasis.opendocument.chart-template': u'.otc',
86  u'application/vnd.oasis.opendocument.image': u'.odi',
87  u'application/vnd.oasis.opendocument.image-template': u'.oti',
88  u'application/vnd.oasis.opendocument.formula': u'.odf',
89  u'application/vnd.oasis.opendocument.formula-template': u'.otf',
90  u'application/vnd.oasis.opendocument.text-master': u'.odm',
91  u'application/vnd.oasis.opendocument.text-web': u'.oth',
92 }
93 
94 ##
95 #
96 # just a record to bear a filename, a mediatype and a bytes content
97 #
99  ##
100  #
101  # the constructor
102  # @param filename a unicode string
103  # @param mediatype a unicode string
104  # @param content a byte string or None
105  #
106  def __init__(self, filename, mediatype, content=None):
107  assert(type(filename)==type(u""))
108  assert(type(mediatype)==type(u""))
109  assert(type(content)==type(b"") or content == None)
110 
111  self.mediatype = mediatype
112  self.filename = filename
113  self.content = content
114 
115 ##
116 #
117 # A class to hold the content of an OpenDocument document
118 # Use the xml method to write the XML
119 # source to the screen or to a file.
120 # Example of use: d = OpenDocument(mimetype); fd.write(d.xml())
121 #
123  thumbnail = None
124 
125  ##
126  #
127  # the constructor
128  # @param mimetype a unicode string
129  # @param add_generator a boolean
130  #
131  def __init__(self, mimetype, add_generator=True):
132  assert(type(mimetype)==type(u""))
133  assert(isinstance(add_generator,True.__class__))
134 
135  self.mimetype = mimetype
136  self.childobjects = []
137  self._extra = []
138  self.folder = u"" # Always empty for toplevel documents
139  self.topnode = Document(mimetype=self.mimetype)
140  self.topnode.ownerDocument = self
141 
142  self.clear_caches()
143 
144  self.Pictures = {}
145  self.meta = Meta()
146  self.topnode.addElement(self.meta)
147  if add_generator:
148  self.meta.addElement(meta.Generator(text=TOOLSVERSION))
149  self.scripts = Scripts()
150  self.topnode.addElement(self.scripts)
152  self.topnode.addElement(self.fontfacedecls)
154  self.topnode.addElement(self.settings)
155  self.styles = Styles()
156  self.topnode.addElement(self.styles)
158  self.topnode.addElement(self.automaticstyles)
160  self.topnode.addElement(self.masterstyles)
161  self.body = Body()
162  self.topnode.addElement(self.body)
163 
164  def rebuild_caches(self, node=None):
165  if node is None: node = self.topnode
166  self.build_caches(node)
167  for e in node.childNodes:
168  if e.nodeType == element.Node.ELEMENT_NODE:
169  self.rebuild_caches(e)
170 
171  ##
172  #
173  # Clears internal caches
174  #
175  def clear_caches(self):
176  self.element_dict = {}
177  self._styles_dict = {}
178  self._styles_ooo_fix = {}
179 
180  ##
181  #
182  # Builds internal caches; called from element.py
183  # @param elt an element.Element instance
184  #
185  def build_caches(self, elt):
186  # assert(isinstance(elt, element.Element))
187  # why do I need this more intricated assertion?
188  # with Python3, the type of elt pops out as odf.element.Element
189  # in one test ???
190  import odf.element
191  assert(isinstance(elt, element.Element) or isinstance(elt, odf.element.Element) )
192 
193  if elt.qname not in self.element_dict:
194  self.element_dict[elt.qname] = []
195  self.element_dict[elt.qname].append(elt)
196  if elt.qname == (STYLENS, u'style'):
197  self.__register_stylename(elt) # Add to style dictionary
198  styleref = elt.getAttrNS(TEXTNS,u'style-name')
199  if styleref is not None and styleref in self._styles_ooo_fix:
200  elt.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])
201 
202  ##
203  #
204  # Register a style. But there are three style dictionaries:
205  # office:styles, office:automatic-styles and office:master-styles
206  # Chapter 14.
207  # @param elt an element.Element instance
208  #
209  def __register_stylename(self, elt):
210  assert(isinstance(elt, element.Element))
211 
212  name = elt.getAttrNS(STYLENS, u'name')
213  if name is None:
214  return
215  if elt.parentNode.qname in ((OFFICENS,u'styles'), (OFFICENS,u'automatic-styles')):
216  if name in self._styles_dict:
217  newname = u'M'+name # Rename style
218  self._styles_ooo_fix[name] = newname
219  # From here on all references to the old name will refer to the new one
220  name = newname
221  elt.setAttrNS(STYLENS, u'name', name)
222  self._styles_dict[name] = elt
223 
224  ##
225  #
226  # converts the document to a valid Xml format.
227  # @param filename unicode string: the name of a file, defaults to
228  # an empty string.
229  # @return if filename is not empty, the XML code will be written into it
230  # and the method returns None; otherwise the method returns a StringIO
231  # containing valid XML.
232  # Then a ".getvalue()" should return a unicode string.
233  #
234  def toXml(self, filename=u''):
235  assert(type(filename)==type(u""))
236 
237  result=None
238  xml=StringIO()
239  if sys.version_info.major==2:
240  xml.write(_XMLPROLOGUE)
241  else:
242  xml.write(_XMLPROLOGUE)
243  self.body.toXml(0, xml)
244  if not filename:
245  result=xml.getvalue()
246  else:
247  f=codecs.open(filename,'w', encoding='utf-8')
248  f.write(xml.getvalue())
249  f.close()
250  return result
251 
252  ##
253  #
254  # Generates the full document as an XML "file"
255  # @return a bytestream in UTF-8 encoding
256  #
257  def xml(self):
258  self.__replaceGenerator()
259  xml=StringIO()
260  if sys.version_info.major==2:
261  xml.write(_XMLPROLOGUE)
262  else:
263  xml.write(_XMLPROLOGUE)
264  self.topnode.toXml(0, xml)
265  return xml.getvalue().encode("utf-8")
266 
267 
268  ##
269  #
270  # Generates the content.xml file
271  # @return a bytestream in UTF-8 encoding
272  #
273  def contentxml(self):
274  xml=StringIO()
275  xml.write(_XMLPROLOGUE)
276  x = DocumentContent()
277  x.write_open_tag(0, xml)
278  if self.scripts.hasChildNodes():
279  self.scripts.toXml(1, xml)
280  if self.fontfacedecls.hasChildNodes():
281  self.fontfacedecls.toXml(1, xml)
282  a = AutomaticStyles()
283  stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
284  if len(stylelist) > 0:
285  a.write_open_tag(1, xml)
286  for s in stylelist:
287  s.toXml(2, xml)
288  a.write_close_tag(1, xml)
289  else:
290  a.toXml(1, xml)
291  self.body.toXml(1, xml)
292  x.write_close_tag(0, xml)
293  return xml.getvalue().encode("utf-8")
294 
295  ##
296  #
297  # Generates the manifest.xml file;
298  # The self.manifest isn't avaible unless the document is being saved
299  # @return a unicode string
300  #
301  def __manifestxml(self):
302  xml=StringIO()
303  xml.write(_XMLPROLOGUE)
304  self.manifest.toXml(0,xml)
305  result=xml.getvalue()
306  assert(type(result)==type(u""))
307  return result
308 
309  ##
310  #
311  # Generates the meta.xml file
312  # @return a unicode string
313  #
314  def metaxml(self):
315  self.__replaceGenerator()
316  x = DocumentMeta()
317  x.addElement(self.meta)
318  xml=StringIO()
319  xml.write(_XMLPROLOGUE)
320  x.toXml(0,xml)
321  result=xml.getvalue()
322  assert(type(result)==type(u""))
323  return result
324 
325  ##
326  #
327  # Generates the settings.xml file
328  # @return a unicode string
329  #
330  def settingsxml(self):
331  x = DocumentSettings()
332  x.addElement(self.settings)
333  xml=StringIO()
334  if sys.version_info.major==2:
335  xml.write(_XMLPROLOGUE)
336  else:
337  xml.write(_XMLPROLOGUE)
338  x.toXml(0,xml)
339  result=xml.getvalue()
340  assert(type(result)==type(u""))
341  return result
342 
343  ##
344  #
345  # Finds references to style objects in master-styles
346  # and add the style name to the style list if not already there.
347  # Recursive
348  # @return the list of style names as unicode strings
349  #
350  def _parseoneelement(self, top, stylenamelist):
351  for e in top.childNodes:
352  if e.nodeType == element.Node.ELEMENT_NODE:
353  for styleref in (
354  (CHARTNS,u'style-name'),
355  (DRAWNS,u'style-name'),
356  (DRAWNS,u'text-style-name'),
357  (PRESENTATIONNS,u'style-name'),
358  (STYLENS,u'data-style-name'),
359  (STYLENS,u'list-style-name'),
360  (STYLENS,u'page-layout-name'),
361  (STYLENS,u'style-name'),
362  (TABLENS,u'default-cell-style-name'),
363  (TABLENS,u'style-name'),
364  (TEXTNS,u'style-name') ):
365  if e.getAttrNS(styleref[0],styleref[1]):
366  stylename = e.getAttrNS(styleref[0],styleref[1])
367  if stylename not in stylenamelist:
368  # due to the polymorphism of e.getAttrNS(),
369  # a unicode type is enforced for elements
370  stylenamelist.append(unicode(stylename))
371  stylenamelist = self._parseoneelement(e, stylenamelist)
372  return stylenamelist
373 
374  ##
375  #
376  # Loop through the masterstyles elements, and find the automatic
377  # styles that are used. These will be added to the automatic-styles
378  # element in styles.xml
379  # @return a list of element.Element instances
380  #
381  def _used_auto_styles(self, segments):
382  stylenamelist = []
383  for top in segments:
384  stylenamelist = self._parseoneelement(top, stylenamelist)
385  stylelist = []
386  for e in self.automaticstyles.childNodes:
387  if e.getAttrNS(STYLENS,u'name') in stylenamelist:
388  stylelist.append(e)
389 
390  # check the type of the returned data
391  ok=True
392  for e in stylelist: ok = ok and isinstance(e, element.Element)
393  assert(ok)
394 
395  return stylelist
396 
397  ##
398  #
399  # Generates the styles.xml file
400  # @return valid XML code as a unicode string
401  #
402  def stylesxml(self):
403  xml=StringIO()
404  xml.write(_XMLPROLOGUE)
405  x = DocumentStyles()
406  x.write_open_tag(0, xml)
407  if self.fontfacedecls.hasChildNodes():
408  self.fontfacedecls.toXml(1, xml)
409  self.styles.toXml(1, xml)
410  a = AutomaticStyles()
411  a.write_open_tag(1, xml)
412  for s in self._used_auto_styles([self.masterstyles]):
413  s.toXml(2, xml)
414  a.write_close_tag(1, xml)
415  if self.masterstyles.hasChildNodes():
416  self.masterstyles.toXml(1, xml)
417  x.write_close_tag(0, xml)
418  result = xml.getvalue()
419 
420  assert(type(result)==type(u""))
421 
422  return result
423 
424  ##
425  #
426  # Add a picture
427  # It uses the same convention as OOo, in that it saves the picture in
428  # the zipfile in the subdirectory 'Pictures'
429  # If passed a file ptr, mediatype must be set
430  # @param filename unicode string: name of a file for Pictures
431  # @param mediatype unicode string: name of a media, None by default
432  # @param content bytes: content of media, None by default
433  # @return a unicode string: the file name of the media, eventually
434  # created on the fly
435  #
436  def addPicture(self, filename, mediatype=None, content=None):
437  if content is None:
438  if mediatype is None:
439  mediatype, encoding = mimetypes.guess_type(filename)
440  if mediatype is None:
441  mediatype = u''
442  try: ext = filename[filename.rindex(u'.'):]
443  except: ext=u''
444  else:
445  ext = mimetypes.guess_extension(mediatype)
446  manifestfn = u"Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
447  self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
448  content=b"" # this value is only use by the assert further
449  filename=u"" # this value is only use by the assert further
450  else:
451  manifestfn = filename
452  self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
453 
454  assert(type(filename)==type(u""))
455  assert(type(mediatype)==type(u""))
456  assert(type(content) == type(b""))
457 
458  return manifestfn
459 
460  ##
461  #
462  # Add a picture
463  # It uses the same convention as OOo, in that it saves the picture in
464  # the zipfile in the subdirectory 'Pictures'.
465  # If mediatype is not given, it will be guessed from the filename
466  # extension.
467  # @param filesname unicode string: name of an image file
468  # @param mediatype unicode string: type of media, dfaults to None
469  # @return a unicode string, the name of the created file
470  #
471  def addPictureFromFile(self, filename, mediatype=None):
472  if mediatype is None:
473  mediatype, encoding = mimetypes.guess_type(filename)
474  if mediatype is None:
475  mediatype = u''
476  try: ext = filename[filename.rindex(u'.'):]
477  except ValueError: ext=u''
478  else:
479  ext = mimetypes.guess_extension(mediatype)
480  manifestfn = u"Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
481  self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
482 
483  assert(type(filename)==type(u""))
484  assert(type(mediatype)==type(u""))
485 
486  return manifestfn
487 
488  ##
489  #
490  # Add a picture from contents given as a Byte string.
491  # It uses the same convention as OOo, in that it saves the picture in
492  # the zipfile in the subdirectory 'Pictures'. The content variable
493  # is a string that contains the binary image data. The mediatype
494  # indicates the image format.
495  # @param content bytes: content of media
496  # @param mediatype unicode string: name of a media
497  # @return a unicode string, the name of the created file
498  #
499  def addPictureFromString(self, content, mediatype):
500  assert(type(content)==type(b""))
501  assert(type(mediatype)==type(u""))
502 
503  ext = mimetypes.guess_extension(mediatype)
504  manifestfn = u"Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
505  self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
506  return manifestfn
507 
508  ##
509  #
510  # Add a fixed thumbnail
511  # The thumbnail in the library is big, so this is pretty useless.
512  # @param filecontent bytes: the content of a file; defaults to None
513  #
514  def addThumbnail(self, filecontent=None):
515  assert(type(filecontent)==type(b""))
516 
517  if filecontent is None:
518  import thumbnail
519  self.thumbnail = thumbnail.thumbnail()
520  else:
521  self.thumbnail = filecontent
522 
523  ##
524  #
525  # Adds an object (subdocument). The object must be an OpenDocument class
526  # @param document OpenDocument instance
527  # @param objectname unicode string: the name of an object to add
528  # @return a unicode string: the folder name in the zipfile the object is
529  # stored in.
530  #
531  def addObject(self, document, objectname=None):
532  assert(isinstance(document, OpenDocument))
533  assert(type(objectname)==type(u"") or objectname == None)
534 
535  self.childobjects.append(document)
536  if objectname is None:
537  document.folder = u"%s/Object %d" % (self.folder, len(self.childobjects))
538  else:
539  document.folder = objectname
540  return u".%s" % document.folder
541 
542  ##
543  #
544  # saves pictures contained in an object
545  # @param anObject instance of OpenDocument containing pictures
546  # @param folder unicode string: place to save pictures
547  #
548  def _savePictures(self, anObject, folder):
549  assert(isinstance(anObject, OpenDocument))
550  assert(type(folder)==type(u""))
551 
552  hasPictures = False
553  for arcname, picturerec in anObject.Pictures.items():
554  what_it_is, fileobj, mediatype = picturerec
555  self.manifest.addElement(manifest.FileEntry(fullpath=u"%s%s" % ( folder ,arcname), mediatype=mediatype))
556  hasPictures = True
557  if what_it_is == IS_FILENAME:
558  self._z.write(fileobj, arcname, zipfile.ZIP_STORED)
559  else:
560  zi = zipfile.ZipInfo(str(arcname), self._now)
561  zi.compress_type = zipfile.ZIP_STORED
562  zi.external_attr = UNIXPERMS
563  self._z.writestr(zi, fileobj)
564  # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
565 # if hasPictures:
566 # self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
567  # Look in subobjects
568  subobjectnum = 1
569  for subobject in anObject.childobjects:
570  self._savePictures(subobject, u'%sObject %d/' % (folder, subobjectnum))
571  subobjectnum += 1
572 
573  ##
574  #
575  # Removes a previous 'generator' stance and declares TOOLSVERSION
576  # as the new generator.
577  # Section 3.1.1: The application MUST NOT export the original identifier
578  # belonging to the application that created the document.
579  #
580  def __replaceGenerator(self):
581  for m in self.meta.childNodes[:]:
582  if m.qname == (METANS, u'generator'):
583  self.meta.removeChild(m)
584  self.meta.addElement(meta.Generator(text=TOOLSVERSION))
585 
586  ##
587  #
588  # Save the document under the filename.
589  # If the filename is '-' then save to stdout
590  # @param outputfile unicode string: the special name '-' is for stdout;
591  # as an alternative, it can be an io.ByteIO instance which contains
592  # the ZIP content.
593  # @param addsuffix boolean: whether to add a suffix or not; defaults to False
594  #
595  def save(self, outputfile, addsuffix=False):
596  assert(type(outputfile)==type(u"") or 'wb' in repr(outputfile) or 'BufferedWriter' in repr(outputfile) or 'BytesIO' in repr(outputfile))
597  assert(type(addsuffix)==type(True))
598 
599  if outputfile == u'-':
600  outputfp = zipfile.ZipFile(sys.stdout,"w")
601  else:
602  if addsuffix:
603  outputfile = outputfile + odmimetypes.get(self.mimetype,u'.xxx')
604  outputfp = zipfile.ZipFile(outputfile, "w")
605  self.__zipwrite(outputfp)
606  outputfp.close()
607 
608  ##
609  #
610  # User API to write the ODF file to an open file descriptor
611  # Writes the ZIP format
612  # @param outputfp open file descriptor
613  #
614  def write(self, outputfp):
615  assert('wb' in repr(outputfp) or 'BufferedWriter' in repr(outputfp) or 'BytesIO' in repr(outputfp))
616 
617  zipoutputfp = zipfile.ZipFile(outputfp,"w")
618  self.__zipwrite(zipoutputfp)
619 
620  ##
621  #
622  # Write the document to an open file pointer
623  # This is where the real work is done
624  # @param outputfp instance of zipfile.ZipFile
625  #
626  def __zipwrite(self, outputfp):
627  assert(isinstance(outputfp, zipfile.ZipFile))
628 
629  self._z = outputfp
630  self._now = time.localtime()[:6]
631  self.manifest = manifest.Manifest()
632 
633  # Write mimetype
634  zi = zipfile.ZipInfo('mimetype', self._now)
635  zi.compress_type = zipfile.ZIP_STORED
636  zi.external_attr = UNIXPERMS
637  self._z.writestr(zi, self.mimetype.encode("utf-8"))
638 
639  self._saveXmlObjects(self,u"")
640 
641  # Write pictures
642  self._savePictures(self,u"")
643 
644  # Write the thumbnail
645  if self.thumbnail is not None:
646  self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/", mediatype=u''))
647  self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/thumbnail.png", mediatype=u''))
648  zi = zipfile.ZipInfo(u"Thumbnails/thumbnail.png", self._now)
649  zi.compress_type = zipfile.ZIP_DEFLATED
650  zi.external_attr = UNIXPERMS
651  self._z.writestr(zi, self.thumbnail)
652 
653  # Write any extra files
654  for op in self._extra:
655  if op.filename == u"META-INF/documentsignatures.xml": continue # Don't save signatures
656  self.manifest.addElement(manifest.FileEntry(fullpath=op.filename, mediatype=op.mediatype))
657  if sys.version_info.major==3:
658  zi = zipfile.ZipInfo(op.filename, self._now)
659  else:
660  zi = zipfile.ZipInfo(op.filename.encode('utf-8'), self._now)
661  zi.compress_type = zipfile.ZIP_DEFLATED
662  zi.external_attr = UNIXPERMS
663  if op.content is not None:
664  self._z.writestr(zi, op.content)
665  # Write manifest
666  zi = zipfile.ZipInfo(u"META-INF/manifest.xml", self._now)
667  zi.compress_type = zipfile.ZIP_DEFLATED
668  zi.external_attr = UNIXPERMS
669  self._z.writestr(zi, self.__manifestxml() )
670  del self._z
671  del self._now
672  del self.manifest
673 
674 
675  ##
676  #
677  # save xml objects of an opendocument to some folder
678  # @param anObject instance of OpenDocument
679  # @param folder unicode string place to save xml objects
680  #
681  def _saveXmlObjects(self, anObject, folder):
682  assert(isinstance(anObject, OpenDocument))
683  assert(type(folder)==type(u""))
684 
685  if self == anObject:
686  self.manifest.addElement(manifest.FileEntry(fullpath=u"/", mediatype=anObject.mimetype))
687  else:
688  self.manifest.addElement(manifest.FileEntry(fullpath=folder, mediatype=anObject.mimetype))
689  # Write styles
690  self.manifest.addElement(manifest.FileEntry(fullpath=u"%sstyles.xml" % folder, mediatype=u"text/xml"))
691  zi = zipfile.ZipInfo(u"%sstyles.xml" % folder, self._now)
692  zi.compress_type = zipfile.ZIP_DEFLATED
693  zi.external_attr = UNIXPERMS
694  self._z.writestr(zi, anObject.stylesxml().encode("utf-8") )
695 
696  # Write content
697  self.manifest.addElement(manifest.FileEntry(fullpath=u"%scontent.xml" % folder, mediatype=u"text/xml"))
698  zi = zipfile.ZipInfo(u"%scontent.xml" % folder, self._now)
699  zi.compress_type = zipfile.ZIP_DEFLATED
700  zi.external_attr = UNIXPERMS
701  self._z.writestr(zi, anObject.contentxml() )
702 
703  # Write settings
704  if anObject.settings.hasChildNodes():
705  self.manifest.addElement(manifest.FileEntry(fullpath=u"%ssettings.xml" % folder, mediatype=u"text/xml"))
706  zi = zipfile.ZipInfo(u"%ssettings.xml" % folder, self._now)
707  zi.compress_type = zipfile.ZIP_DEFLATED
708  zi.external_attr = UNIXPERMS
709  self._z.writestr(zi, anObject.settingsxml() )
710 
711  # Write meta
712  if self == anObject:
713  self.manifest.addElement(manifest.FileEntry(fullpath=u"meta.xml", mediatype=u"text/xml"))
714  zi = zipfile.ZipInfo(u"meta.xml", self._now)
715  zi.compress_type = zipfile.ZIP_DEFLATED
716  zi.external_attr = UNIXPERMS
717  self._z.writestr(zi, anObject.metaxml() )
718 
719  # Write subobjects
720  subobjectnum = 1
721  for subobject in anObject.childobjects:
722  self._saveXmlObjects(subobject, u'%sObject %d/' % (folder, subobjectnum))
723  subobjectnum += 1
724 
725 # Document's DOM methods
726  ##
727  #
728  # Inconvenient interface to create an element, but follows XML-DOM.
729  # Does not allow attributes as argument, therefore can't check grammar.
730  # @param elt element.Element instance
731  # @return an element.Element instance whose grammar is not checked
732  #
733  def createElement(self, elt):
734  assert(isinstance(elt, element.Element))
735 
736  # this old code is ambiguous: is 'element' the module or is it the
737  # local variable? To disambiguate this, the local variable has been
738  # renamed to 'elt'
739  #return element(check_grammar=False)
740  return elt(check_grammar=False)
741 
742  ##
743  #
744  # Method to create a text node
745  # @param data unicode string to include in the Text element
746  # @return an instance of element.Text
747  #
748  def createTextNode(self, data):
749  assert(type(data)==type(u""))
750 
751  return element.Text(data)
752 
753  ##
754  #
755  # Method to create a CDATA section
756  # @param data unicode string to include in the CDATA element
757  # @return an instance of element.CDATASection
758  #
759  def createCDATASection(self, data):
760  assert(type(data)==type(u""))
761 
762  return element.CDATASection(cdata)
763 
764  ##
765  #
766  # Returns the media type
767  # @result a unicode string
768  #
769  def getMediaType(self):
770  assert (type(self.mimetype)==type(u""))
771 
772  return self.mimetype
773 
774  ##
775  #
776  # Finds a style object based on the name
777  # @param name unicode string the name of style to search
778  # @return a syle as an element.Element instance
779  #
780  def getStyleByName(self, name):
781  assert(type(name)==type(u""))
782 
783  ncname = make_NCName(name)
784  if self._styles_dict == {}:
785  self.rebuild_caches()
786  result=self._styles_dict.get(ncname, None)
787 
788  assert(isinstance(result, element.Element))
789  return result
790 
791  ##
792  #
793  # Gets elements based on the type, which is function from
794  # text.py, draw.py etc.
795  # @param elt instance of a function which returns an element.Element
796  # @return a list of istances of element.Element
797  #
798  def getElementsByType(self, elt):
799  import types
800  assert(isinstance (elt, types.FunctionType))
801 
802  obj = elt(check_grammar=False)
803  assert (isinstance(obj, element.Element))
804 
805  if self.element_dict == {}:
806  self.rebuild_caches()
807 
808  # This previous code was ambiguous
809  # was "element" the module name or the local variable?
810  # the local variable is renamed to "elt" to disambiguate the code
811  #return self.element_dict.get(obj.qname, [])
812 
813  result=self.element_dict.get(obj.qname, [])
814 
815  ok=True
816  for e in result: ok = ok and isinstance(e, element.Element)
817  assert(ok)
818 
819  return result
820 
821 # Convenience functions
822 ##
823 #
824 # Creates a chart document
825 # @return an OpenDocument instance with chart mimetype
826 #
828  doc = OpenDocument(u'application/vnd.oasis.opendocument.chart')
829  doc.chart = Chart()
830  doc.body.addElement(doc.chart)
831  return doc
832 
833 ##
834 #
835 # Creates a drawing document
836 # @return an OpenDocument instance with drawing mimetype
837 #
839  doc = OpenDocument(u'application/vnd.oasis.opendocument.graphics')
840  doc.drawing = Drawing()
841  doc.body.addElement(doc.drawing)
842  return doc
843 
844 ##
845 #
846 # Creates an image document
847 # @return an OpenDocument instance with image mimetype
848 #
850  doc = OpenDocument(u'application/vnd.oasis.opendocument.image')
851  doc.image = Image()
852  doc.body.addElement(doc.image)
853  return doc
854 
855 ##
856 #
857 # Creates a presentation document
858 # @return an OpenDocument instance with presentation mimetype
859 #
861  doc = OpenDocument(u'application/vnd.oasis.opendocument.presentation')
862  doc.presentation = Presentation()
863  doc.body.addElement(doc.presentation)
864  return doc
865 
866 ##
867 #
868 # Creates a spreadsheet document
869 # @return an OpenDocument instance with spreadsheet mimetype
870 #
872  doc = OpenDocument(u'application/vnd.oasis.opendocument.spreadsheet')
873  doc.spreadsheet = Spreadsheet()
874  doc.body.addElement(doc.spreadsheet)
875  return doc
876 
877 ##
878 #
879 # Creates a text document
880 # @return an OpenDocument instance with text mimetype
881 #
883  doc = OpenDocument(u'application/vnd.oasis.opendocument.text')
884  doc.text = Text()
885  doc.body.addElement(doc.text)
886  return doc
887 
888 ##
889 #
890 # Creates a text master document
891 # @return an OpenDocument instance with master mimetype
892 #
894  doc = OpenDocument(u'application/vnd.oasis.opendocument.text-master')
895  doc.text = Text()
896  doc.body.addElement(doc.text)
897  return doc
898 
899 ##
900 #
901 # Parses a document from its zipfile
902 # @param z an instance of zipfile.ZipFile
903 # @param manifest Manifest data structured in a dictionary
904 # @param doc instance of OpenDocument to feed in
905 # @param objectpath unicode string: path to an object
906 #
907 def __loadxmlparts(z, manifest, doc, objectpath):
908  assert(isinstance(z, zipfile.ZipFile))
909  assert(type(manifest)==type(dict()))
910  assert(isinstance(doc, OpenDocument))
911  assert(type(objectpath)==type(u""))
912 
913  from load import LoadParser
914  from xml.sax import make_parser, handler
915 
916  for xmlfile in (objectpath+u'settings.xml', objectpath+u'meta.xml', objectpath+u'content.xml', objectpath+u'styles.xml'):
917  if xmlfile not in manifest:
918  continue
919  ##########################################################
920  # this one is added to debug the bad behavior with Python2
921  # which raises exceptions of type SAXParseException
922  from xml.sax._exceptions import SAXParseException
923  ##########################################################
924  try:
925  xmlpart = z.read(xmlfile).decode("utf-8")
926  doc._parsing = xmlfile
927 
928  parser = make_parser()
929  parser.setFeature(handler.feature_namespaces, 1)
930  parser.setContentHandler(LoadParser(doc))
931  parser.setErrorHandler(handler.ErrorHandler())
932 
933  inpsrc = InputSource()
934  #################
935  # There may be a SAXParseException triggered because of
936  # a missing xmlns prefix like meta, config, etc.
937  # So i add such declarations when needed (GK, 2014/10/21).
938  # Is there any option to prevent xmlns checks by SAX?
939  xmlpart=__fixXmlPart(xmlpart)
940 
941  inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8")))
942  parser.parse(inpsrc)
943  del doc._parsing
944  except KeyError as v: pass
945  except SAXParseException:
946  print (u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
947 
948 ##
949 #
950 # fixes an xml code when it does not contain a set of requested
951 # "xmlns:whatever" declarations.
952 # added by G.K. on 2014/10/21
953 # @param xmlpart unicode string: some XML code
954 # @return fixed XML code
955 #
956 def __fixXmlPart(xmlpart):
957  result=xmlpart
958  requestedPrefixes = (u'meta', u'config', u'dc', u'style',
959  u'svg', u'fo',u'draw', u'table',u'form')
960  for prefix in requestedPrefixes:
961  if u' xmlns:{prefix}'.format(prefix=prefix) not in xmlpart:
962  pos=result.index(u" xmlns:")
963  toInsert=u' xmlns:{prefix}="urn:oasis:names:tc:opendocument:xmlns:{prefix}:1.0"'.format(prefix=prefix)
964  result=result[:pos]+toInsert+result[pos:]
965  return result
966 
967 
968 ##
969 #
970 # detects the mime-type of an ODF file
971 # @param zipfd an open zipfile.ZipFile instance
972 # @param odffile this parameter is not used
973 # @return a mime-type as a unicode string
974 #
975 def __detectmimetype(zipfd, odffile):
976  assert(isinstance(zipfd, zipfile.ZipFile))
977  assert(type(odffile)==type(u"") or 'rb' in repr(odffile) \
978  or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile))
979 
980  try:
981  mimetype = zipfd.read('mimetype').decode("utf-8")
982  return mimetype
983  except:
984  pass
985  # Fall-through to next mechanism
986  manifestpart = zipfd.read('META-INF/manifest.xml')
987  manifest = manifestlist(manifestpart)
988  for mentry,mvalue in manifest.items():
989  if mentry == "/":
990  assert(type(mvalue['media-type'])==type(u""))
991  return mvalue['media-type']
992 
993  # Fall-through to last mechanism
994  return u'application/vnd.oasis.opendocument.text'
995 
996 ##
997 #
998 # Load an ODF file into memory
999 # @param odffile unicode string: name of a file, or as an alternative,
1000 # an open readable stream
1001 # @return a reference to the structure (an OpenDocument instance)
1002 #
1003 def load(odffile):
1004  assert(type(odffile)==type(u"") or 'rb' in repr(odffile) \
1005  or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile))
1006 
1007  z = zipfile.ZipFile(odffile)
1008  mimetype = __detectmimetype(z, odffile)
1009  doc = OpenDocument(mimetype, add_generator=False)
1010 
1011  # Look in the manifest file to see if which of the four files there are
1012  manifestpart = z.read('META-INF/manifest.xml')
1013  manifest = manifestlist(manifestpart)
1014  __loadxmlparts(z, manifest, doc, u'')
1015  for mentry,mvalue in manifest.items():
1016  if mentry[:9] == u"Pictures/" and len(mentry) > 9:
1017  doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
1018  elif mentry == u"Thumbnails/thumbnail.png":
1019  doc.addThumbnail(z.read(mentry))
1020  elif mentry in (u'settings.xml', u'meta.xml', u'content.xml', u'styles.xml'):
1021  pass
1022  # Load subobjects into structure
1023  elif mentry[:7] == u"Object " and len(mentry) < 11 and mentry[-1] == u"/":
1024  subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
1025  doc.addObject(subdoc, u"/" + mentry[:-1])
1026  __loadxmlparts(z, manifest, subdoc, mentry)
1027  elif mentry[:7] == u"Object ":
1028  pass # Don't load subobjects as opaque objects
1029  else:
1030  if mvalue['full-path'][-1] == u'/':
1031  doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
1032  else:
1033  doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry)))
1034  # Add the SUN junk here to the struct somewhere
1035  # It is cached data, so it can be out-of-date
1036  z.close()
1037  b = doc.getElementsByType(Body)
1038  if mimetype[:39] == u'application/vnd.oasis.opendocument.text':
1039  doc.text = b[0].firstChild
1040  elif mimetype[:43] == u'application/vnd.oasis.opendocument.graphics':
1041  doc.graphics = b[0].firstChild
1042  elif mimetype[:47] == u'application/vnd.oasis.opendocument.presentation':
1043  doc.presentation = b[0].firstChild
1044  elif mimetype[:46] == u'application/vnd.oasis.opendocument.spreadsheet':
1045  doc.spreadsheet = b[0].firstChild
1046  elif mimetype[:40] == u'application/vnd.oasis.opendocument.chart':
1047  doc.chart = b[0].firstChild
1048  elif mimetype[:40] == u'application/vnd.oasis.opendocument.image':
1049  doc.image = b[0].firstChild
1050  elif mimetype[:42] == u'application/vnd.oasis.opendocument.formula':
1051  doc.formula = b[0].firstChild
1052 
1053  return doc
1054 
1055 # vim: set expandtab sw=4 :
def Scripts
Definition: office.py:92
def DocumentStyles
Definition: office.py:62
def __manifestxml
Generates the manifest.xml file; The self.manifest isn't avaible unless the document is being saved...
just a record to bear a filename, a mediatype and a bytes content
Definition: opendocument.py:98
def build_caches
Builds internal caches; called from element.py.
def Chart
Definition: chart.py:31
def settingsxml
Generates the settings.xml file.
def createElement
Inconvenient interface to create an element, but follows XML-DOM.
def addObject
Adds an object (subdocument).
def contentxml
Generates the content.xml file.
def getMediaType
Returns the media type.
def __init__
the constructor
def Settings
Definition: office.py:95
def FontFaceDecls
Definition: office.py:71
A class to hold the content of an OpenDocument document Use the xml method to write the XML source to...
def OpenDocumentSpreadsheet
Creates a spreadsheet document.
def Presentation
Definition: office.py:86
def DocumentMeta
Definition: office.py:56
def OpenDocumentText
Creates a text document.
def _saveXmlObjects
save xml objects of an opendocument to some folder
def write
User API to write the ODF file to an open file descriptor Writes the ZIP format.
def __replaceGenerator
Removes a previous 'generator' stance and declares TOOLSVERSION as the new generator.
def Image
Definition: draw.py:125
def OpenDocumentImage
Creates an image document.
def _savePictures
saves pictures contained in an object
def __zipwrite
Write the document to an open file pointer This is where the real work is done.
def Drawing
Definition: office.py:65
def load
Load an ODF file into memory.
def OpenDocumentDrawing
Creates a drawing document.
def addPictureFromFile
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
def addPicture
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
def OpenDocumentTextMaster
Creates a text master document.
def __init__
the constructor
def Spreadsheet
Definition: office.py:98
def xml
Generates the full document as an XML "file".
def __register_stylename
Register a style.
def toXml
converts the document to a valid Xml format.
def DocumentSettings
Definition: office.py:59
def metaxml
Generates the meta.xml file.
def Body
Definition: office.py:38
def save
Save the document under the filename.
def Text
Definition: form.py:104
Creates a arbitrary element and is intended to be subclassed not used on its own. ...
Definition: element.py:299
def Styles
Definition: office.py:101
def AutomaticStyles
Definition: office.py:32
def getStyleByName
Finds a style object based on the name.
def DocumentContent
Definition: office.py:53
def OpenDocumentPresentation
Creates a presentation document.
def createTextNode
Method to create a text node.
def createCDATASection
Method to create a CDATA section.
def addThumbnail
Add a fixed thumbnail The thumbnail in the library is big, so this is pretty useless.
def stylesxml
Generates the styles.xml file.
def clear_caches
Clears internal caches.
def _parseoneelement
Finds references to style objects in master-styles and add the style name to the style list if not al...
def addPictureFromString
Add a picture from contents given as a Byte string.
def _used_auto_styles
Loop through the masterstyles elements, and find the automatic styles that are used.
def Meta
Definition: office.py:83
def MasterStyles
Definition: office.py:80
def Document
Definition: office.py:50
def OpenDocumentChart
Creates a chart document.
def getElementsByType
Gets elements based on the type, which is function from text.py, draw.py etc.