23 import sys, zipfile, xml.dom.minidom
24 from namespaces
import nsdict
25 from elementtypes
import *
35 ] + [ nsdict[item[0]]+
":"+item[1]
for item
in empty_elements]
37 INLINE_TAGS = [ nsdict[item[0]]+
":"+item[1]
for item
in inline_elements]
57 elif value ==
"normal":
63 elif value ==
"normal":
70 if value
and value !=
"none":
74 if value
and value !=
"none":
78 if value
is None or value ==
'':
80 posisize = value.split(
' ')
82 if textpos.find(
'%') == -1:
86 elif textpos ==
"super":
90 itextpos = int(textpos[:textpos.find(
'%')])
100 return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.
italic),
131 return "[bq=%s, h=%d, code=%s]" % (str(self.
blockquote),
170 for tag
in IGNORED_TAGS:
173 for tag
in INLINE_TAGS:
186 for fontFace
in fontDecl.getElementsByTagName(
"style:font-face"):
187 if fontFace.getAttribute(
"style:font-pitch") ==
"fixed":
188 self.fixedFonts.append(fontFace.getAttribute(
"style:name"))
199 parentProp = self.textStyles.get(parent,
None)
201 textProp = parentProp
203 textPropEl = style.getElementsByTagName(
"style:text-properties")
204 if not textPropEl:
return textProps
206 textPropEl = textPropEl[0]
208 textProps.setItalic(textPropEl.getAttribute(
"fo:font-style"))
209 textProps.setBold(textPropEl.getAttribute(
"fo:font-weight"))
210 textProps.setUnderlined(textPropEl.getAttribute(
"style:text-underline-style"))
211 textProps.setStrikethrough(textPropEl.getAttribute(
"style:text-line-through-style"))
212 textProps.setPosition(textPropEl.getAttribute(
"style:text-position"))
214 if textPropEl.getAttribute(
"style:font-name")
in self.
fixedFonts:
215 textProps.setFixed(
True)
225 name = style.getAttribute(
"style:name")
227 if name.startswith(
"Heading_20_"):
231 paraProps.setHeading(level)
236 paraProps.setTitle(
True)
238 paraPropEl = style.getElementsByTagName(
"style:paragraph-properties")
240 paraPropEl = paraPropEl[0]
241 leftMargin = paraPropEl.getAttribute(
"fo:margin-left")
244 leftMargin = float(leftMargin[:-2])
245 if leftMargin > 0.01:
246 paraProps.setIndented(
True)
252 paraProps.setCode(
True)
262 for style
in styleElements:
264 name = style.getAttribute(
"style:name")
266 if name ==
"Standard":
continue
268 family = style.getAttribute(
"style:family")
269 parent = style.getAttribute(
"style:parent-style-name")
274 elif family ==
"paragraph":
281 for style
in listStyleElements:
282 name = style.getAttribute(
"style:name")
285 if style.hasChildNodes():
286 subitems = [el
for el
in style.childNodes
287 if el.nodeType == xml.dom.Node.ELEMENT_NODE
288 and el.tagName ==
"text:list-level-style-number"]
289 if len(subitems) > 0:
290 prop.setOrdered(
True)
299 zip = zipfile.ZipFile(filepath)
301 styles_doc = xml.dom.minidom.parseString(zip.read(
"styles.xml"))
302 fontfacedecls = styles_doc.getElementsByTagName(
"office:font-face-decls")
305 self.
processStyles(styles_doc.getElementsByTagName(
"style:style"))
308 self.
content = xml.dom.minidom.parseString(zip.read(
"content.xml"))
309 fontfacedecls = self.content.getElementsByTagName(
"office:font-face-decls")
313 self.
processStyles(self.content.getElementsByTagName(
"style:style"))
321 lines = text.split(
"\n")
323 numLines = len(lines)
324 for i
in range(numLines):
326 if (lines[i].strip()
or i == numLines-1
or i == 0
or
327 not ( lines[i-1].startswith(
" ")
328 and lines[i+1].startswith(
" ") ) ):
329 buffer.append(
"\n" + lines[i])
331 return ''.join(buffer)
342 link = node.getAttribute(
"xlink:href")
343 if link
and link[:2] ==
'./':
345 if link
and link[:9] ==
'Pictures/':
347 return "[[Image(%s)]]\n" % link
351 link = node.getAttribute(
"xlink:href")
352 if link.strip() == text.strip():
353 return "[%s] " % link.strip()
355 return "[%s %s] " % (link.strip(), text.strip())
362 cite = (node.getElementsByTagName(
"text:note-citation")[0]
363 .childNodes[0].nodeValue)
364 body = (node.getElementsByTagName(
"text:note-body")[0]
371 num = int(node.getAttribute(
"text:c"))
385 styleName = node.getAttribute(
"text:style-name")
386 style = self.textStyles.get(styleName,
TextProps())
389 return "`" + text +
"`"
399 if style.strikethrough:
401 if style.superscript:
407 return "%s%s%s" % (
''.join(mark), text,
''.join(revmark))
415 styleName = listElement.getAttribute(
"text:style-name")
419 for item
in listElement.childNodes:
420 buffer.append(
" "*indent)
424 number =
" " + number +
". "
425 buffer.append(
" 1. ")
428 subitems = [el
for el
in item.childNodes
429 if el.tagName
in [
"text:p",
"text:h",
"text:list"]]
430 for subitem
in subitems:
431 if subitem.tagName ==
"text:list":
440 return ''.join(buffer)
450 for item
in tableElement.childNodes:
452 if item.tagName ==
"table:table-header-rows":
454 if item.tagName ==
"table:table-row":
455 buffer.append(
"\n||")
456 for cell
in item.childNodes:
460 return ''.join(buffer)
468 body = self.content.getElementsByTagName(
"office:body")[0]
469 text = body.childNodes[0]
473 paragraphs = [el
for el
in text.childNodes
474 if el.tagName
in [
"draw:page",
"text:p",
"text:h",
"text:section",
475 "text:list",
"table:table"]]
477 for paragraph
in paragraphs:
478 if paragraph.tagName ==
"text:list":
480 elif paragraph.tagName ==
"text:section":
482 elif paragraph.tagName ==
"table:table":
491 buffer.append(
"----")
493 buffer.append(
"%s: %s" % (cite, body))
504 for node
in element.childNodes:
506 if node.nodeType == xml.dom.Node.TEXT_NODE:
507 buffer.append(node.nodeValue)
509 elif node.nodeType == xml.dom.Node.ELEMENT_NODE:
512 if tag
in (
"draw:text-box",
"draw:frame"):
515 elif tag
in (
"text:p",
"text:h"):
519 elif tag ==
"text:list":
522 method = self.elements.get(tag)
524 buffer.append(method(node))
526 buffer.append(
" {" + tag +
"} ")
528 return ''.join(buffer)
534 style_name = paragraph.getAttribute(
"text:style-name")
535 paraProps = self.paragraphStyles.get(style_name, dummyParaProps)
538 if paraProps
and not paraProps.code:
541 if paragraph.tagName ==
"text:p" and self.
lastsegment ==
"text:p":
548 return "= " + text +
" =\n"
550 outlinelevel = paragraph.getAttribute(
"text:outline-level")
553 level = int(outlinelevel)
557 return "=" * level +
" " + text +
" " +
"=" * level +
"\n"
560 return "{{{\n" + text +
"\n}}}\n"
562 if paraProps.indented:
563 return self.
wrapParagraph(text, indent = indent, blockquote =
True)
578 return ''.join(buffer) + text
580 for token
in text.split():
582 if counter > LIMIT - indent:
583 buffer.append(
"\n" +
" "*indent)
588 buffer.append(token +
" ")
589 counter += len(token)
591 return ''.join(buffer)
Holds properties for a text style.
Holds properties of a paragraph style.
def compressCodeBlocks
Removes extra blank lines from code blocks.
def tableToString
MoinMoin uses || to delimit table cells.
def extractParagraphProperties
Extracts paragraph properties from a style element.
def extractTextProperties
Extracts text properties from a style element.
def toString
Converts the document to a string.
def processStyles
Runs through "style" elements extracting necessary information.
def processFontDeclarations
Extracts necessary font information from a font-declaration element.
Holds properties for a list style.
def load
Loads an ODT file.