File: HtmlSax.py

package info (click to toggle)
python-xml 0.8.4-10.1%2Blenny1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 4,972 kB
  • ctags: 10,628
  • sloc: python: 46,730; ansic: 14,354; xml: 968; makefile: 201; sh: 20
file content (94 lines) | stat: -rw-r--r-- 3,123 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
########################################################################
#
# File Name:            HtmlSax.py
#
#
#
"""
Components for reading HTML files from a SAX-like producer.
WWW: http://4suite.com/4DOM         e-mail: support@4suite.com

Copyright (c) 2000 Fourthought Inc, USA.   All Rights Reserved.
See  http://4suite.com/COPYRIGHT  for license and copyright information
"""

import sys, string, cStringIO
import xml.dom.ext
from xml.dom import Node
from xml.dom import implementation


class HtmlDomGenerator:
    def __init__(self, keepAllWs=0):
        self._keepAllWs = keepAllWs

    def initState(self, ownerDoc=None):
        """
        If None is passed in as the doc, set up an empty document to act
        as owner and also add all elements to this document
        """
        if ownerDoc == None:
            self._ownerDoc = implementation.createHTMLDocument('')
            de = self._ownerDoc.documentElement
            self._ownerDoc.removeChild(de)
            xml.dom.ext.ReleaseNode(de)
            self._rootNode = self._ownerDoc
        else:
            self._ownerDoc = ownerDoc
            #Create a docfrag to hold all the generated nodes.
            self._rootNode = self._ownerDoc.createDocumentFragment()

        #Set up the stack which keeps track of the nesting of DOM nodes.
        self._nodeStack = []
        self._nodeStack.append(self._rootNode)
        self._currText = ''
        return

    def getRootNode(self):
        self._completeTextNode()
        return self._rootNode

    def _completeTextNode(self):
        if self._currText:
            new_text = self._ownerDoc.createTextNode(self._currText)
            self._nodeStack[-1].appendChild(new_text)
            self._currText = ''

    #Overridden DocumentHandler methods
    def startElement(self, name, attribs):
        self._completeTextNode()
        new_element = self._ownerDoc.createElement(name)

        for curr_attrib_key in attribs.keys():
            new_element.setAttribute(curr_attrib_key, attribs[curr_attrib_key])
        self._nodeStack.append(new_element)

    def endElement(self, name):
        self._completeTextNode()
        new_element = self._nodeStack[-1]
        del self._nodeStack[-1]
        self._nodeStack[-1].appendChild(new_element)

    def ignorableWhitespace(self, ch, start, length):
        """
        If 'keepAllWs' permits, add ignorable white-space as a text node.
        Remember that a Document node cannot contain text nodes directly.
        If the white-space occurs outside the root element, there is no place
        for it in the DOM and it must be discarded.
        """
        if self._keepAllWs and self._nodeStack[-1].nodeType !=  Node.DOCUMENT_NODE:
            self._currText = self._currText + ch[start:start+length]

    def characters(self, ch, start, length):
        self._currText = self._currText + ch[start:start+length]


    #Overridden ErrorHandler methods
    #def warning(self, exception):
    #   raise exception

    def error(self, exception):
        raise exception

    def fatalError(self, exception):
        raise exception