1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
"""A module of experimental extensions to the standard SAX interface."""
import saxlib,imp,sys,string
# --- Internal utility methods
def rec_find_module(module):
"Improvement over imp.find_module which finds submodules."
path=""
for mod in string.split(module,"."):
if path=="":
info=(mod,)+imp.find_module(mod)
else:
info=(mod,)+imp.find_module(mod,[path])
lastmod=apply(imp.load_module,info)
try:
path=lastmod.__path__[0]
except AttributeError,e:
pass
return info
# --- Parser factory
class ParserFactory:
"""A general class to be used by applications for creating parsers on
foreign systems where it is unknown which parsers exist."""
def __init__(self,list=None):
self.parsers=list
def get_parser_list(self):
"Returns the list of possible drivers."
return self.parsers
def set_parser_list(self,list):
"Sets the driver list."
self.parsers=list
def make_parser(self,drv_name=None):
"""Returns a SAX driver for the first available parser of the parsers
in the list. Note that the list is one of drivers, so it first tries
the driver and if that exists imports it to see if the parser also
exists. If no parsers are available a SAXException is thrown.
Accepts the driver package name as an optional argument."""
if drv_name==None:
list=self.parsers
else:
list=[ drv_name ]
for parser_name in list:
parser_name = 'xml.sax.drivers.drv_' + parser_name
try:
info=rec_find_module(parser_name)
drv_module=apply(imp.load_module,info)
return drv_module.create_parser()
except ImportError,e:
pass
raise saxlib.SAXException("No parsers found",None)
# --- Experimental extension to Parser interface
class ExtendedParser(saxlib.Parser):
"Experimental unofficial SAX level 2 extended parser interface."
def get_parser_name(self):
"Returns a single-word parser name."
raise saxlib.SAXException("Method not supported.",None)
def get_parser_version(self):
"""Returns the version of the imported parser, which may not be the
one the driver was implemented for."""
raise saxlib.SAXException("Method not supported.",None)
def get_driver_version(self):
"Returns the version number of the driver."
raise saxlib.SAXException("Method not supported.",None)
def is_validating(self):
"True if the parser is validating, false otherwise."
raise saxlib.SAXException("Method not supported.",None)
def is_dtd_reading(self):
"""True if the parser is non-validating, but conforms to the spec by
reading the DTD."""
raise saxlib.SAXException("Method not supported.",None)
def reset(self):
"Makes the parser start parsing afresh."
raise saxlib.SAXException("Method not supported.",None)
def feed(self,data):
"Feeds data to the parser."
raise saxlib.SAXException("Method not supported.",None)
def close(self):
"Called after the last call to feed, when there are no more data."
raise saxlib.SAXException("Method not supported.",None)
# --- Experimental document handler which does not slice strings
class NosliceDocumentHandler(saxlib.DocumentHandler):
"""A document handler that does not force the client application to
slice character data strings."""
def __init__(self):
saxlib.DocumentHandler.__init__()
self.characters=self.safe_handler
def safe_handler(self,data,start,length):
"""A characters event handler that always works, but doesn't always
slice strings."""
if start==0 and length==len(data):
self.handle_data(data)
else:
self.handle_data(data[start:start+length])
def slice_handler(self,data,start,length):
"A character event handler that always slices strings."
self.handle_data(data[start:start+length])
def noslice_handler(self,data,start,length):
"A character event handler that never slices strings."
self.handle_data(data)
def handle_data(self,data):
"This is the character data event method to override."
pass
# --- Creating parser factories
XMLParserFactory=ParserFactory(["pyexpat", "xmltok", "xmlproc",
"xmltoolkit", "xmllib", "xmldc",
"sgmlop"])
XMLValParserFactory=ParserFactory(["xmlproc_val"])
HTMLParserFactory=ParserFactory(["htmllib", "sgmlop", "sgmllib"])
SGMLParserFactory=ParserFactory(["sgmlop", "sgmllib"])
def make_parser(parser=None):
if parser==None:
return XMLParserFactory.make_parser()
else:
return XMLParserFactory.make_parser(parser)
|