File: drv_sgmlop.py

package info (click to toggle)
qm 1.1.3-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 8,628 kB
  • ctags: 10,249
  • sloc: python: 41,482; ansic: 20,611; xml: 12,837; sh: 485; makefile: 226
file content (110 lines) | stat: -rw-r--r-- 2,668 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
SAX driver for the sgmlop parser.

$Id: drv_sgmlop.py,v 1.1.1.1 2001/07/17 04:47:38 samuel Exp $
"""

version="0.12"

from xml.parsers import sgmlop
from xml.sax import saxlib,saxutils
from xml.sax import SAXException
import urllib,string

# --- Driver

class Parser(saxlib.Parser):

    def __init__(self):
        saxlib.Parser.__init__(self)
        self.reset()
    
    def setDocumentHandler(self, dh):
	self.parser.register(self) # older version wanted ,1 arg
        self.doc_handler=dh

    def parse(self, url):
        self.parseFile(urllib.urlopen(url))
        
    def parseFile(self, file):
        self._parsing = 1
        self.doc_handler.startDocument()
	parser = self.parser

	while 1:
	    data = file.read(16384)
	    if not data:
		break
	    parser.feed(data)

	self.close()

    # --- SAX 1.0 METHODS

    def handle_cdata(self, data):
        self.doc_handler.characters(data,0,len(data))

    def handle_data(self, data):
        #ignore white space outside the toplevel element
        if self._nesting == 0:
            if string.strip(data)!="":
                # It's not whitespace?
                self.err_handler.error(SAXException(
                    "characters '%s' outside root element" % data))
            return
        self.doc_handler.characters(data,0,len(data))
        
    def handle_proc(self, target, data):
        if target=='xml':
            # Don't report <?xml?> as a processing instruction
            return
        self.doc_handler.processingInstruction(target,data)

    def handle_charref(self, charno):
        if charno<256:
            self.doc_handler.characters(chr(charno),0,1)

    def finish_starttag(self, name, attrs):
        self._nesting = self._nesting + 1
        self.doc_handler.startElement(name,saxutils.AttributeMap(attrs))

    def finish_endtag(self,name):
        self._nesting = self._nesting - 1
        self.doc_handler.endElement(name)

    # --- EXPERIMENTAL PYTHON SAX EXTENSIONS

    def get_parser_name(self):
        return "sgmlop"

    def get_parser_version(self):
        return "Unknown"

    def get_driver_version(self):
        return version
    
    def is_validating(self):
        return 0

    def is_dtd_reading(self):
        return 0

    def reset(self):
        self.parser=sgmlop.XMLParser()
        self._parsing=0
        self._nesting=0
    
    def feed(self,data):
        if not self._parsing:
            self.doc_handler.startDocument()
            self._parsing=1
        self.parser.feed(data)

    def close(self):
        self.parser.close()
        self.doc_handler.endDocument()
        
# ----

def create_parser():
    return Parser()