"""
Common code for the sgmllib, htmllib and xmllib parser drivers.
"""

from xml.sax import saxlib,saxutils

import urllib

# --- LibParser

class LibParser(saxlib.Parser,saxlib.Locator):
    "Common code for the sgmllib, htmllib and xmllib parser drivers."

    def __init__(self):
        saxlib.Parser.__init__(self)
        
    def parse(self,sysID):
        "Parses the referenced document."
        self.sysID=sysID
        self.parseFile(urllib.urlopen(sysID))
        
    def parseFile(self,fileobj):
        "Parses the given file."
        if self._can_locate():
            self.doc_handler.setDocumentLocator(self)
        self.reset()
        while 1:
            buf=fileobj.read(16384)
            if buf=="": break

            try:
                self.feed(buf)
            except RuntimeError,e:
                self.err_handler.fatalError(saxlib.SAXException(str(e),e))
            
        self.close()

    def unknown_endtag(self,tag):
        "Handles end tags."
        self.doc_handler.endElement(tag)

    def handle_xml(self,encoding,standalone):
        "Remembers whether the document is standalone."
        self.standalone= standalone=="yes"
        
    def handle_data(self,data):
        "Handles PCDATA."
        self.doc_handler.characters(data,0,len(data))

    def handle_cdata(self,data):
        "Handles CDATA marked sections."
        self.doc_handler.characters(data,0,len(data))       

    def syntax_error(self, message):
        "Handles non-fatal errors."
        self.err_handler.error(saxlib.SAXException(message,None))
    
        
# --- SGMLParsers
        
class SGMLParsers(LibParser):
    "Common code for the sgmllib and htmllib parsers."
    
    def handle_pi(self,data):
        "Handles processing instructions."
        # Should we try to parse out the name if there is one?
        self.doc_handler.processingInstruction("",data)

    # handle_starttag is never called!
        
    def unknown_starttag(self,tag,attributes):
        "Handles start tags."
        attrs={}
        for (a,v) in attributes:
            attrs[a]=v
        
        self.doc_handler.startElement(tag,saxutils.AttributeMap(attrs))

    def handle_endtag(self,tag,method):
        "Handles end tags."
        self.doc_handler.endElement(tag)
        
    def unknown_entityref(self,name):
        "Handles entity references by throwing an error."
        self.err_handler.fatalError(saxlib.SAXException("Reference to unknown entity "
                                                 "'%s'" % name,None))

    def unknown_charref(self,no):
        "Handles non-ASCII character references."
        self.err_handler.fatalError(saxlib.SAXException("Reference to unknown character '%d'" % no,None))
        
    def report_unbalanced(self,gi):
        "Reports unbalanced tags."
        self.err_handler.fatalError(saxlib.SAXException("Unbalanced end tag for '%s'" % gi,None))
        
    def _can_locate(self):
        "Internal: returns true if location info is available."
        return 0
