doc/html/odf2moinmoin_8py_source.html

 # -*- coding: utf-8 -*-

 # Copyright (C) 2006-2008 Søren Roug, European Environment Agency

 #

 # This library is free software; you can redistribute it and/or

 # modify it under the terms of the GNU Lesser General Public

 # License as published by the Free Software Foundation; either

 # version 2.1 of the License, or (at your option) any later version.

 #

 # This library is distributed in the hope that it will be useful,

 # but WITHOUT ANY WARRANTY; without even the implied warranty of

 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 # Lesser General Public License for more details.

 #

 # You should have received a copy of the GNU Lesser General Public

 # License along with this library; if not, write to the Free Software

 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 #

 # See http://trac.edgewall.org/wiki/WikiFormatting

 #

 # Contributor(s):

 #


 import sys, zipfile, xml.dom.minidom

 from namespaces import nsdict

 from elementtypes import *


 IGNORED_TAGS = [

     'draw:a'

     'draw:g',

     'draw:line',

     'draw:object-ole',

     'office:annotation',

     'presentation:notes',

     'svg:desc',

 ] + [ nsdict[item[0]]+":"+item[1] for item in empty_elements]


 INLINE_TAGS = [ nsdict[item[0]]+":"+item[1] for item in inline_elements]


 ##

 #  Holds properties for a text style.

 class TextProps:


     def __init__(self):


         self.italic = False

         self.bold = False

         self.fixed = False

         self.underlined = False

         self.strikethrough = False

         self.superscript = False

         self.subscript = False


     def setItalic(self, value):

         if value == "italic":

             self.italic = True

         elif value == "normal":

             self.italic = False


     def setBold(self, value):

         if value == "bold":

             self.bold = True

         elif value == "normal":

             self.bold = False


     def setFixed(self, value):

         self.fixed = value


     def setUnderlined(self, value):

         if value and value != "none":

             self.underlined = True


     def setStrikethrough(self, value):

         if value and value != "none":

             self.strikethrough = True


     def setPosition(self, value):

         if value is None or value == '':

             return

         posisize = value.split(' ')

         textpos = posisize[0]

         if textpos.find('%') == -1:

             if textpos == "sub":

                 self.superscript = False

                 self.subscript = True

             elif textpos == "super":

                 self.superscript = True

                 self.subscript = False

         else:

             itextpos = int(textpos[:textpos.find('%')])

             if itextpos > 10:

                 self.superscript = False

                 self.subscript = True

             elif itextpos < -10:

                 self.superscript = True

                 self.subscript = False


     def __str__(self):


         return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.italic),

                                           str(self.bold),

                                           str(self.fixed))


 ##

 #  Holds properties of a paragraph style.

 class ParagraphProps:


     def __init__(self):


         self.blockquote = False

         self.headingLevel = 0

         self.code = False

         self.title = False

         self.indented = 0


     def setIndented(self, value):

         self.indented = value


     def setHeading(self, level):

         self.headingLevel = level


     def setTitle(self, value):

         self.title = value


     def setCode(self, value):

         self.code = value


     def __str__(self):


         return "[bq=%s, h=%d, code=%s]" % (str(self.blockquote),

                                            self.headingLevel,

                                            str(self.code))


 ##

 #  Holds properties for a list style.

 class ListProperties:


     def __init__(self):

         self.ordered = False


     def setOrdered(self, value):

         self.ordered = value


 class ODF2MoinMoin(object):


     def __init__(self, filepath):

         self.footnotes = []

         self.footnoteCounter = 0

         self.textStyles = {"Standard": TextProps()}

         self.paragraphStyles = {"Standard": ParagraphProps()}

         self.listStyles = {}

         self.fixedFonts = []

         self.hasTitle = 0

         self.lastsegment = None


         # Tags

         self.elements = {

          'draw:page': self.textToString,

          'draw:frame': self.textToString,

          'draw:image': self.draw_image,

          'draw:text-box': self.textToString,

          'text:a': self.text_a,

          'text:note': self.text_note,

         }

         for tag in IGNORED_TAGS:

             self.elements[tag] = self.do_nothing


         for tag in INLINE_TAGS:

             self.elements[tag] = self.inline_markup

         self.elements['text:line-break'] = self.text_line_break

         self.elements['text:s'] = self.text_s

         self.elements['text:tab'] = self.text_tab


         self.load(filepath)


     ##

     #  Extracts necessary font information from a font-declaration

     #             element.

     #

     def processFontDeclarations(self, fontDecl):

         for fontFace in fontDecl.getElementsByTagName("style:font-face"):

             if fontFace.getAttribute("style:font-pitch") == "fixed":

                 self.fixedFonts.append(fontFace.getAttribute("style:name"))


     ##

     #  Extracts text properties from a style element.

     def extractTextProperties(self, style, parent=None):


         textProps = TextProps()


         if parent:

             parentProp = self.textStyles.get(parent, None)

             if parentProp:

                 textProp = parentProp


         textPropEl = style.getElementsByTagName("style:text-properties")

         if not textPropEl: return textProps


         textPropEl = textPropEl[0]


         textProps.setItalic(textPropEl.getAttribute("fo:font-style"))

         textProps.setBold(textPropEl.getAttribute("fo:font-weight"))

         textProps.setUnderlined(textPropEl.getAttribute("style:text-underline-style"))

         textProps.setStrikethrough(textPropEl.getAttribute("style:text-line-through-style"))

         textProps.setPosition(textPropEl.getAttribute("style:text-position"))


         if textPropEl.getAttribute("style:font-name") in self.fixedFonts:

             textProps.setFixed(True)


         return textProps


     ##

     #  Extracts paragraph properties from a style element.

     def extractParagraphProperties(self, style, parent=None):


         paraProps = ParagraphProps()


         name = style.getAttribute("style:name")


         if name.startswith("Heading_20_"):

             level = name[11:]

             try:

                 level = int(level)

                 paraProps.setHeading(level)

             except:

                 level = 0


         if name == "Title":

             paraProps.setTitle(True)


         paraPropEl = style.getElementsByTagName("style:paragraph-properties")

         if paraPropEl:

             paraPropEl = paraPropEl[0]

             leftMargin = paraPropEl.getAttribute("fo:margin-left")

             if leftMargin:

                 try:

                     leftMargin = float(leftMargin[:-2])

                     if leftMargin > 0.01:

                         paraProps.setIndented(True)

                 except:

                     pass


         textProps = self.extractTextProperties(style)

         if textProps.fixed:

             paraProps.setCode(True)


         return paraProps


     ##

     #  Runs through "style" elements extracting necessary information.

     #

     def processStyles(self, styleElements):


         for style in styleElements:


             name = style.getAttribute("style:name")


             if name == "Standard": continue


             family = style.getAttribute("style:family")

             parent = style.getAttribute("style:parent-style-name")


             if family == "text":

                 self.textStyles[name] = self.extractTextProperties(style, parent)


             elif family == "paragraph":

                 self.paragraphStyles[name] = \

                                  self.extractParagraphProperties(style, parent)

                 self.textStyles[name] = self.extractTextProperties(style, parent)


     def processListStyles(self, listStyleElements):


         for style in listStyleElements:

             name = style.getAttribute("style:name")


             prop = ListProperties()

             if style.hasChildNodes():

                 subitems = [el for el in style.childNodes

                      if el.nodeType == xml.dom.Node.ELEMENT_NODE

                      and el.tagName == "text:list-level-style-number"]

                 if len(subitems) > 0:

                     prop.setOrdered(True)


             self.listStyles[name] = prop


     ##

     #  Loads an ODT file.

     def load(self, filepath):


         zip = zipfile.ZipFile(filepath)


         styles_doc = xml.dom.minidom.parseString(zip.read("styles.xml"))

         fontfacedecls = styles_doc.getElementsByTagName("office:font-face-decls")

         if fontfacedecls:

             self.processFontDeclarations(fontfacedecls[0])

         self.processStyles(styles_doc.getElementsByTagName("style:style"))

         self.processListStyles(styles_doc.getElementsByTagName("text:list-style"))


         self.content = xml.dom.minidom.parseString(zip.read("content.xml"))

         fontfacedecls = self.content.getElementsByTagName("office:font-face-decls")

         if fontfacedecls:

             self.processFontDeclarations(fontfacedecls[0])


         self.processStyles(self.content.getElementsByTagName("style:style"))

         self.processListStyles(self.content.getElementsByTagName("text:list-style"))


     ##

     #  Removes extra blank lines from code blocks.

     def compressCodeBlocks(self, text):


         return text

         lines = text.split("\n")

         buffer = []

         numLines = len(lines)

         for i in range(numLines):


             if (lines[i].strip() or i == numLines-1  or i == 0 or

                 not ( lines[i-1].startswith("    ")

                       and lines[i+1].startswith("    ") ) ):

                 buffer.append("\n" + lines[i])


         return ''.join(buffer)


 #-----------------------------------

     def do_nothing(self, node):

         return ''


     ##

     #

     #

     def draw_image(self, node):


         link = node.getAttribute("xlink:href")

         if link and link[:2] == './': # Indicates a sub-object, which isn't supported

             return "%s\n" % link

         if link and link[:9] == 'Pictures/':

             link = link[9:]

         return "[[Image(%s)]]\n" % link


     def text_a(self, node):

         text = self.textToString(node)

         link = node.getAttribute("xlink:href")

         if link.strip() == text.strip():

             return "[%s] " % link.strip()

         else:

             return "[%s %s] " % (link.strip(), text.strip())


     def text_line_break(self, node):

         return "[[BR]]"


     def text_note(self, node):

         cite = (node.getElementsByTagName("text:note-citation")[0]

                     .childNodes[0].nodeValue)

         body = (node.getElementsByTagName("text:note-body")[0]

                     .childNodes[0])

         self.footnotes.append((cite, self.textToString(body)))

         return "^%s^" % cite


     def text_s(self, node):

         try:

             num = int(node.getAttribute("text:c"))

             return " "*num

         except:

             return " "


     def text_tab(self, node):

         return "    "


     def inline_markup(self, node):

         text = self.textToString(node)


         if not text.strip():

             return ''  # don't apply styles to white space


         styleName = node.getAttribute("text:style-name")

         style = self.textStyles.get(styleName, TextProps())


         if style.fixed:

             return "`" + text + "`"


         mark = []

         if style:

             if style.italic:

                 mark.append("''")

             if style.bold:

                 mark.append("'''")

             if style.underlined:

                 mark.append("__")

             if style.strikethrough:

                 mark.append("~~")

             if style.superscript:

                 mark.append("^")

             if style.subscript:

                 mark.append(",,")

         revmark = mark[:]

         revmark.reverse()

         return "%s%s%s" % (''.join(mark), text, ''.join(revmark))


 #-----------------------------------

     def listToString(self, listElement, indent = 0):


         self.lastsegment = listElement.tagName

         buffer = []


         styleName = listElement.getAttribute("text:style-name")

         props = self.listStyles.get(styleName, ListProperties())


         i = 0

         for item in listElement.childNodes:

             buffer.append(" "*indent)

             i += 1

             if props.ordered:

                 number = str(i)

                 number = " " + number + ". "

                 buffer.append(" 1. ")

             else:

                 buffer.append(" * ")

             subitems = [el for el in item.childNodes

                           if el.tagName in ["text:p", "text:h", "text:list"]]

             for subitem in subitems:

                 if subitem.tagName == "text:list":

                     buffer.append("\n")

                     buffer.append(self.listToString(subitem, indent+3))

                 else:

                     buffer.append(self.paragraphToString(subitem, indent+3))

                 self.lastsegment = subitem.tagName

             self.lastsegment = item.tagName

             buffer.append("\n")


         return ''.join(buffer)


     ##

     #  MoinMoin uses || to delimit table cells

     #

     def tableToString(self, tableElement):


         self.lastsegment = tableElement.tagName

         buffer = []


         for item in tableElement.childNodes:

             self.lastsegment = item.tagName

             if item.tagName == "table:table-header-rows":

                 buffer.append(self.tableToString(item))

             if item.tagName == "table:table-row":

                 buffer.append("\n||")

                 for cell in item.childNodes:

                     buffer.append(self.inline_markup(cell))

                     buffer.append("||")

                     self.lastsegment = cell.tagName

         return ''.join(buffer)


     ##

     #  Converts the document to a string.

     #             FIXME: Result from second call differs from first call

     #

     def toString(self):

         body = self.content.getElementsByTagName("office:body")[0]

         text = body.childNodes[0]


         buffer = []


         paragraphs = [el for el in text.childNodes

                       if el.tagName in ["draw:page", "text:p", "text:h","text:section",

                                         "text:list", "table:table"]]


         for paragraph in paragraphs:

             if paragraph.tagName == "text:list":

                 text = self.listToString(paragraph)

             elif paragraph.tagName == "text:section":

                 text = self.textToString(paragraph)

             elif paragraph.tagName == "table:table":

                 text = self.tableToString(paragraph)

             else:

                 text = self.paragraphToString(paragraph)

             if text:

                 buffer.append(text)


         if self.footnotes:


             buffer.append("----")

             for cite, body in self.footnotes:

                 buffer.append("%s: %s" % (cite, body))


         buffer.append("")

         return self.compressCodeBlocks('\n'.join(buffer))


     def textToString(self, element):


         buffer = []


         for node in element.childNodes:


             if node.nodeType == xml.dom.Node.TEXT_NODE:

                 buffer.append(node.nodeValue)


             elif node.nodeType == xml.dom.Node.ELEMENT_NODE:

                 tag = node.tagName


                 if tag in ("draw:text-box", "draw:frame"):

                     buffer.append(self.textToString(node))


                 elif tag in ("text:p", "text:h"):

                     text = self.paragraphToString(node)

                     if text:

                         buffer.append(text)

                 elif tag == "text:list":

                     buffer.append(self.listToString(node))

                 else:

                     method = self.elements.get(tag)

                     if method:

                         buffer.append(method(node))

                     else:

                         buffer.append(" {" + tag + "} ")


         return ''.join(buffer)


     def paragraphToString(self, paragraph, indent = 0):


         dummyParaProps = ParagraphProps()


         style_name = paragraph.getAttribute("text:style-name")

         paraProps = self.paragraphStyles.get(style_name, dummyParaProps)

         text = self.inline_markup(paragraph)


         if paraProps and not paraProps.code:

             text = text.strip()


         if paragraph.tagName == "text:p" and self.lastsegment == "text:p":

             text = "\n" + text


         self.lastsegment = paragraph.tagName


         if paraProps.title:

             self.hasTitle = 1

             return "= " + text + " =\n"


         outlinelevel = paragraph.getAttribute("text:outline-level")

         if outlinelevel:


             level = int(outlinelevel)

             if self.hasTitle: level += 1


             if level >= 1:

                 return "=" * level + " " + text + " " + "=" * level + "\n"


         elif paraProps.code:

             return "{{{\n" + text + "\n}}}\n"


         if paraProps.indented:

             return self.wrapParagraph(text, indent = indent, blockquote = True)


         else:

             return self.wrapParagraph(text, indent = indent)


     def wrapParagraph(self, text, indent = 0, blockquote=False):


         counter = 0

         buffer = []

         LIMIT = 50


         if blockquote:

             buffer.append("  ")


         return ''.join(buffer) + text

         # Unused from here

         for token in text.split():


             if counter > LIMIT - indent:

                 buffer.append("\n" + " "*indent)

                 if blockquote:

                     buffer.append("  ")

                 counter = 0


             buffer.append(token + " ")

             counter += len(token)


         return ''.join(buffer)

odf.odf2moinmoin.ODF2MoinMoin.text_s
def text_s
Definition: odf2moinmoin.py:369

odf.odf2moinmoin.ParagraphProps.blockquote
blockquote
Definition: odf2moinmoin.py:110

odf.odf2moinmoin.TextProps
Holds properties for a text style.
Definition: odf2moinmoin.py:42

odf.odf2moinmoin.ODF2MoinMoin.footnoteCounter
footnoteCounter
Definition: odf2moinmoin.py:153

odf.odf2moinmoin.TextProps.setFixed
def setFixed
Definition: odf2moinmoin.py:66

odf.odf2moinmoin.TextProps.setStrikethrough
def setStrikethrough
Definition: odf2moinmoin.py:73

odf.odf2moinmoin.TextProps.setUnderlined
def setUnderlined
Definition: odf2moinmoin.py:69

odf.odf2moinmoin.ODF2MoinMoin.text_tab
def text_tab
Definition: odf2moinmoin.py:376

odf.odf2moinmoin.ODF2MoinMoin.hasTitle
hasTitle
Definition: odf2moinmoin.py:158

odf.odf2moinmoin.ParagraphProps.__init__
def __init__
Definition: odf2moinmoin.py:108

odf.odf2moinmoin.ParagraphProps.setHeading
def setHeading
Definition: odf2moinmoin.py:119

odf.odf2moinmoin.TextProps.setBold
def setBold
Definition: odf2moinmoin.py:60

odf.odf2moinmoin.ListProperties.ordered
ordered
Definition: odf2moinmoin.py:141

odf.odf2moinmoin.TextProps.__str__
def __str__
Definition: odf2moinmoin.py:98

odf.odf2moinmoin.ODF2MoinMoin.processListStyles
def processListStyles
Definition: odf2moinmoin.py:279

odf.odf2moinmoin.ODF2MoinMoin.draw_image
def draw_image
Definition: odf2moinmoin.py:340

odf.odf2moinmoin.ODF2MoinMoin.__init__
def __init__
Definition: odf2moinmoin.py:151

odf.odf2moinmoin.ParagraphProps
Holds properties of a paragraph style.
Definition: odf2moinmoin.py:106

odf.odf2moinmoin.ParagraphProps.setCode
def setCode
Definition: odf2moinmoin.py:125

odf.odf2moinmoin.ODF2MoinMoin.compressCodeBlocks
def compressCodeBlocks
Removes extra blank lines from code blocks.
Definition: odf2moinmoin.py:318

odf.odf2moinmoin.TextProps.subscript
subscript
Definition: odf2moinmoin.py:52

odf.odf2moinmoin.TextProps.underlined
underlined
Definition: odf2moinmoin.py:49

odf.odf2moinmoin.ParagraphProps.title
title
Definition: odf2moinmoin.py:113

odf.odf2moinmoin.ODF2MoinMoin.elements
elements
Definition: odf2moinmoin.py:162

odf.odf2moinmoin.ODF2MoinMoin.inline_markup
def inline_markup
Definition: odf2moinmoin.py:379

odf.odf2moinmoin.ParagraphProps.__str__
def __str__
Definition: odf2moinmoin.py:129

odf.odf2moinmoin.ODF2MoinMoin.wrapParagraph
def wrapParagraph
Definition: odf2moinmoin.py:569

odf.odf2moinmoin.ParagraphProps.indented
indented
Definition: odf2moinmoin.py:114

odf.odf2moinmoin.TextProps.strikethrough
strikethrough
Definition: odf2moinmoin.py:50

odf.odf2moinmoin.TextProps.superscript
superscript
Definition: odf2moinmoin.py:51

odf.odf2moinmoin.ODF2MoinMoin.tableToString
def tableToString
MoinMoin uses || to delimit table cells.
Definition: odf2moinmoin.py:445

odf.odf2moinmoin.ODF2MoinMoin.extractParagraphProperties
def extractParagraphProperties
Extracts paragraph properties from a style element.
Definition: odf2moinmoin.py:221

odf.odf2moinmoin.ParagraphProps.headingLevel
headingLevel
Definition: odf2moinmoin.py:111

odf.odf2moinmoin.TextProps.italic
italic
Definition: odf2moinmoin.py:46

odf.odf2moinmoin.ODF2MoinMoin.textStyles
textStyles
Definition: odf2moinmoin.py:154

odf.odf2moinmoin.ODF2MoinMoin.paragraphStyles
paragraphStyles
Definition: odf2moinmoin.py:155

odf.odf2moinmoin.ODF2MoinMoin.text_note
def text_note
Definition: odf2moinmoin.py:361

odf.odf2moinmoin.ODF2MoinMoin.extractTextProperties
def extractTextProperties
Extracts text properties from a style element.
Definition: odf2moinmoin.py:194

object

odf.odf2moinmoin.ODF2MoinMoin.text_line_break
def text_line_break
Definition: odf2moinmoin.py:358

odf.odf2moinmoin.TextProps.fixed
fixed
Definition: odf2moinmoin.py:48

odf.odf2moinmoin.ParagraphProps.code
code
Definition: odf2moinmoin.py:112

odf.odf2moinmoin.TextProps.__init__
def __init__
Definition: odf2moinmoin.py:44

odf.odf2moinmoin.ListProperties.setOrdered
def setOrdered
Definition: odf2moinmoin.py:143

odf.odf2moinmoin.ODF2MoinMoin.footnotes
footnotes
Definition: odf2moinmoin.py:152

odf.odf2moinmoin.ODF2MoinMoin.listStyles
listStyles
Definition: odf2moinmoin.py:156

odf.odf2moinmoin.ParagraphProps.setIndented
def setIndented
Definition: odf2moinmoin.py:116

odf.odf2moinmoin.ODF2MoinMoin.toString
def toString
Converts the document to a string.
Definition: odf2moinmoin.py:467

odf.odf2moinmoin.TextProps.setPosition
def setPosition
Definition: odf2moinmoin.py:77

odf.odf2moinmoin.ODF2MoinMoin.textToString
def textToString
Definition: odf2moinmoin.py:500

odf.odf2moinmoin.ODF2MoinMoin.listToString
def listToString
Definition: odf2moinmoin.py:410

odf.odf2moinmoin.ODF2MoinMoin.do_nothing
def do_nothing
Definition: odf2moinmoin.py:334

odf.odf2moinmoin.ODF2MoinMoin
Definition: odf2moinmoin.py:148

odf.odf2moinmoin.ODF2MoinMoin.processStyles
def processStyles
Runs through "style" elements extracting necessary information.
Definition: odf2moinmoin.py:260

odf.odf2moinmoin.ODF2MoinMoin.lastsegment
lastsegment
Definition: odf2moinmoin.py:159

odf.odf2moinmoin.TextProps.setItalic
def setItalic
Definition: odf2moinmoin.py:54

odf.odf2moinmoin.ODF2MoinMoin.fixedFonts
fixedFonts
Definition: odf2moinmoin.py:157

odf.odf2moinmoin.ODF2MoinMoin.paragraphToString
def paragraphToString
Definition: odf2moinmoin.py:530

odf.odf2moinmoin.TextProps.bold
bold
Definition: odf2moinmoin.py:47

odf.odf2moinmoin.ODF2MoinMoin.processFontDeclarations
def processFontDeclarations
Extracts necessary font information from a font-declaration element.
Definition: odf2moinmoin.py:185

odf.odf2moinmoin.ListProperties.__init__
def __init__
Definition: odf2moinmoin.py:140

odf.odf2moinmoin.ListProperties
Holds properties for a list style.
Definition: odf2moinmoin.py:138

odf.odf2moinmoin.ParagraphProps.setTitle
def setTitle
Definition: odf2moinmoin.py:122

odf.odf2moinmoin.ODF2MoinMoin.load
def load
Loads an ODT file.
Definition: odf2moinmoin.py:297

odf.odf2moinmoin.ODF2MoinMoin.text_a
def text_a
Definition: odf2moinmoin.py:349

odf.odf2moinmoin.ODF2MoinMoin.content
content
Definition: odf2moinmoin.py:308