from . import _inputstream
from . import _tokenizer

from . import treebuilders
from .treebuilders.base import Marker

from . import _utils
from .constants import (
    spaceCharacters, asciiUpper2Lower,
    specialElements, headingElements, cdataElements, rcdataElements,
    tokenTypes,
    namespaces,
    htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
    adjustForeignAttributes as adjustForeignAttributesMap,
    adjustMathMLAttributes, adjustSVGAttributes,
    E,
    _ReparseException
)


def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
    """Parse an HTML document as a string or file-like object into a tree

    :arg doc: the document to parse as a string or file-like object

    :arg treebuilder: the treebuilder to use when parsing

    :arg namespaceHTMLElements: whether or not to namespace HTML elements

    :returns: parsed tree

    Example:

    >>> from html5rdf.html5parser import parse
    >>> parse('<html><body><p>This is a doc</p></body></html>')
    <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>

    """
    tb = treebuilders.getTreeBuilder(treebuilder)
    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
    return p.parse(doc, **kwargs)


def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
    """Parse an HTML fragment as a string or file-like object into a tree

    :arg doc: the fragment to parse as a string or file-like object

    :arg container: the container context to parse the fragment in

    :arg treebuilder: the treebuilder to use when parsing

    :arg namespaceHTMLElements: whether or not to namespace HTML elements

    :returns: parsed tree

    Example:

    >>> from html5rdf.html5libparser import parseFragment
    >>> parseFragment('<b>this is a fragment</b>')
    <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>

    """
    tb = treebuilders.getTreeBuilder(treebuilder)
    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
    return p.parseFragment(doc, container=container, **kwargs)


class HTMLParser:
    """HTML parser

    Generates a tree structure from a stream of (possibly malformed) HTML.

    """

    def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
        """
        :arg tree: a treebuilder class controlling the type of tree that will be
            returned. Built in treebuilders can be accessed through
            html5lib.treebuilders.getTreeBuilder(treeType)

        :arg strict: raise an exception when a parse error is encountered

        :arg namespaceHTMLElements: whether or not to namespace HTML elements

        :arg debug: whether or not to enable debug mode which logs things

        Example:

        >>> from html5rdf.html5parser import HTMLParser
        >>> parser = HTMLParser()                     # generates parser with etree builder
        >>> parser = HTMLParser('lxml', strict=True)  # generates parser with lxml builder which is strict

        """

        # Raise an exception on the first error encountered
        self.strict = strict
        self.debug = debug

        if tree is None:
            tree = treebuilders.getTreeBuilder("etree")
        elif isinstance(tree, str):
            tree = treebuilders.getTreeBuilder(tree)

        self.tree = tree(namespaceHTMLElements)
        self.errors = []

        self.phases = {name: cls(self, self.tree) for name, cls in
                       _phases.items()}

    def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):

        self.innerHTMLMode = innerHTML
        self.container = container
        self.scripting = scripting
        self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
        self.reset()

        try:
            self.mainLoop()
        except _ReparseException:
            self.reset()
            self.mainLoop()

    def reset(self):
        self.tree.reset()
        self.firstStartTag = False
        self.errors = []
        self.log = []  # only used with debug mode
        # "quirks" / "limited quirks" / "no quirks"
        self.compatMode = "no quirks"

        if self.innerHTMLMode:
            self.innerHTML = self.container.lower()

            if self.innerHTML in cdataElements:
                self.tokenizer.state = self.tokenizer.rcdataState
            elif self.innerHTML in rcdataElements:
                self.tokenizer.state = self.tokenizer.rawtextState
            elif self.innerHTML == 'plaintext':
                self.tokenizer.state = self.tokenizer.plaintextState
            else:
                # state already is data state
                # self.tokenizer.state = self.tokenizer.dataState
                pass
            self.phase = self.phases["beforeHtml"]
            self.phase.insertHtmlElement()
            self.resetInsertionMode()
        else:
            self.innerHTML = False  # pylint:disable=redefined-variable-type
            self.phase = self.phases["initial"]

        self.lastPhase = None

        self.beforeRCDataPhase = None

        self.framesetOK = True

    @property
    def documentEncoding(self):
        """Name of the character encoding that was used to decode the input stream, or
        :obj:`None` if that is not determined yet

        """
        if not hasattr(self, 'tokenizer'):
            return None
        return self.tokenizer.stream.charEncoding[0].name

    def isHTMLIntegrationPoint(self, element):
        if (element.name == "annotation-xml" and
                element.namespace == namespaces["mathml"]):
            return ("encoding" in element.attributes and
                    element.attributes["encoding"].translate(
                        asciiUpper2Lower) in
                    ("text/html", "application/xhtml+xml"))
        else:
            return (element.namespace, element.name) in htmlIntegrationPointElements

    def isMathMLTextIntegrationPoint(self, element):
        return (element.namespace, element.name) in mathmlTextIntegrationPointElements

    def mainLoop(self):
        CharactersToken = tokenTypes["Characters"]
        SpaceCharactersToken = tokenTypes["SpaceCharacters"]
        StartTagToken = tokenTypes["StartTag"]
        EndTagToken = tokenTypes["EndTag"]
        CommentToken = tokenTypes["Comment"]
        DoctypeToken = tokenTypes["Doctype"]
        ParseErrorToken = tokenTypes["ParseError"]

        type_names = {value: key for key, value in tokenTypes.items()}
        debug = self.debug

        for token in self.tokenizer:
            prev_token = None
            new_token = token
            while new_token is not None:
                prev_token = new_token
                currentNode = self.tree.openElements[-1] if self.tree.openElements else None
                currentNodeNamespace = currentNode.namespace if currentNode else None
                currentNodeName = currentNode.name if currentNode else None

                type = new_token["type"]

                if type == ParseErrorToken:
                    self.parseError(new_token["data"], new_token.get("datavars", {}))
                    new_token = None
                else:
                    if (len(self.tree.openElements) == 0 or
                        currentNodeNamespace == self.tree.defaultNamespace or
                        (self.isMathMLTextIntegrationPoint(currentNode) and
                         ((type == StartTagToken and
                           token["name"] not in frozenset(["mglyph", "malignmark"])) or
                          type in (CharactersToken, SpaceCharactersToken))) or
                        (currentNodeNamespace == namespaces["mathml"] and
                         currentNodeName == "annotation-xml" and
                         type == StartTagToken and
                         token["name"] == "svg") or
                        (self.isHTMLIntegrationPoint(currentNode) and
                         type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
                        phase = self.phase
                    else:
                        phase = self.phases["inForeignContent"]

                    if debug:
                        info = {"type": type_names[type]}
                        if type in (StartTagToken, EndTagToken):
                            info["name"] = new_token['name']

                        self.log.append((self.tokenizer.state.__name__,
                                         self.phase.__class__.__name__,
                                         phase.__class__.__name__,
                                         "process" + info["type"],
                                         info))

                    if type == CharactersToken:
                        new_token = phase.processCharacters(new_token)
                    elif type == SpaceCharactersToken:
                        new_token = phase.processSpaceCharacters(new_token)
                    elif type == StartTagToken:
                        new_token = phase.processStartTag(new_token)
                    elif type == EndTagToken:
                        new_token = phase.processEndTag(new_token)
                    elif type == CommentToken:
                        new_token = phase.processComment(new_token)
                    elif type == DoctypeToken:
                        new_token = phase.processDoctype(new_token)

            if (type == StartTagToken and prev_token["selfClosing"] and
                    not prev_token["selfClosingAcknowledged"]):
                self.parseError("non-void-element-with-trailing-solidus",
                                {"name": prev_token["name"]})

        # When the loop finishes it's EOF
        reprocess = True
        phases = []
        while reprocess:
            phases.append(self.phase)
            reprocess = self.phase.processEOF()
            if reprocess:
                assert self.phase not in phases

    def parse(self, stream, *args, **kwargs):
        """Parse a HTML document into a well-formed tree

        :arg stream: a file-like object or string containing the HTML to be parsed

            The optional encoding parameter must be a string that indicates
            the encoding.  If specified, that encoding will be used,
            regardless of any BOM or later declaration (such as in a meta
            element).

        :arg scripting: treat noscript elements as if JavaScript was turned on

        :returns: parsed tree

        Example:

        >>> from html5rdf.html5parser import HTMLParser
        >>> parser = HTMLParser()
        >>> parser.parse('<html><body><p>This is a doc</p></body></html>')
        <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>

        """
        self._parse(stream, False, None, *args, **kwargs)
        return self.tree.getDocument()

    def parseFragment(self, stream, *args, **kwargs):
        """Parse a HTML fragment into a well-formed tree fragment

        :arg container: name of the element we're setting the innerHTML
            property if set to None, default to 'div'

        :arg stream: a file-like object or string containing the HTML to be parsed

            The optional encoding parameter must be a string that indicates
            the encoding.  If specified, that encoding will be used,
            regardless of any BOM or later declaration (such as in a meta
            element)

        :arg scripting: treat noscript elements as if JavaScript was turned on

        :returns: parsed tree

        Example:

        >>> from html5rdf.html5libparser import HTMLParser
        >>> parser = HTMLParser()
        >>> parser.parseFragment('<b>this is a fragment</b>')
        <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>

        """
        self._parse(stream, True, *args, **kwargs)
        return self.tree.getFragment()

    def parseError(self, errorcode="XXX-undefined-error", datavars=None):
        # XXX The idea is to make errorcode mandatory.
        if datavars is None:
            datavars = {}
        self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
        if self.strict:
            raise ParseError(E[errorcode] % datavars)

    def adjustMathMLAttributes(self, token):
        adjust_attributes(token, adjustMathMLAttributes)

    def adjustSVGAttributes(self, token):
        adjust_attributes(token, adjustSVGAttributes)

    def adjustForeignAttributes(self, token):
        adjust_attributes(token, adjustForeignAttributesMap)

    def reparseTokenNormal(self, token):
        # pylint:disable=unused-argument
        self.parser.phase()

    def resetInsertionMode(self):
        # The name of this method is mostly historical. (It's also used in the
        # specification.)
        last = False
        newModes = {
            "select": "inSelect",
            "td": "inCell",
            "th": "inCell",
            "tr": "inRow",
            "tbody": "inTableBody",
            "thead": "inTableBody",
            "tfoot": "inTableBody",
            "caption": "inCaption",
            "colgroup": "inColumnGroup",
            "table": "inTable",
            "head": "inBody",
            "body": "inBody",
            "frameset": "inFrameset",
            "html": "beforeHead"
        }
        for node in self.tree.openElements[::-1]:
            nodeName = node.name
            new_phase = None
            if node == self.tree.openElements[0]:
                assert self.innerHTML
                last = True
                nodeName = self.innerHTML
            # Check for conditions that should only happen in the innerHTML
            # case
            if nodeName in ("select", "colgroup", "head", "html"):
                assert self.innerHTML

            if not last and node.namespace != self.tree.defaultNamespace:
                continue

            if nodeName in newModes:
                new_phase = self.phases[newModes[nodeName]]
                break
            elif last:
                new_phase = self.phases["inBody"]
                break

        self.phase = new_phase

    def parseRCDataRawtext(self, token, contentType):
        # Generic RCDATA/RAWTEXT Parsing algorithm
        assert contentType in ("RAWTEXT", "RCDATA")

        self.tree.insertElement(token)

        if contentType == "RAWTEXT":
            self.tokenizer.state = self.tokenizer.rawtextState
        else:
            self.tokenizer.state = self.tokenizer.rcdataState

        self.originalPhase = self.phase

        self.phase = self.phases["text"]


class Phase:
    """Base class for helper object that implements each phase of processing
    """
    __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")

    def __init__(self, parser, tree):
        self.parser = parser
        self.tree = tree
        self.__startTagCache = {}
        self.__endTagCache = {}

    def processEOF(self):
        raise NotImplementedError

    def processComment(self, token):
        # For most phases the following is correct. Where it's not it will be
        # overridden.
        self.tree.insertComment(token, self.tree.openElements[-1])

    def processDoctype(self, token):
        self.parser.parseError("unexpected-doctype")

    def processCharacters(self, token):
        self.tree.insertText(token["data"])

    def processSpaceCharacters(self, token):
        self.tree.insertText(token["data"])

    def processStartTag(self, token):
        # Note the caching is done here rather than BoundMethodDispatcher as doing it there
        # requires a circular reference to the Phase, and this ends up with a significant
        # (CPython 3.8) GC cost when parsing many short inputs
        name = token["name"]
        # In Py2, using `in` is quicker in general than try/except KeyError
        # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
        if name in self.__startTagCache:
            func = self.__startTagCache[name]
        else:
            func = self.__startTagCache[name] = self.startTagHandler[name]
            # bound the cache size in case we get loads of unknown tags
            while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
                # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
                self.__startTagCache.pop(next(iter(self.__startTagCache)))
        return func(token)

    def startTagHtml(self, token):
        if not self.parser.firstStartTag and token["name"] == "html":
            self.parser.parseError("non-html-root")
        # XXX Need a check here to see if the first start tag token emitted is
        # this token... If it's not, invoke self.parser.parseError().
        for attr, value in token["data"].items():
            if attr not in self.tree.openElements[0].attributes:
                self.tree.openElements[0].attributes[attr] = value
        self.parser.firstStartTag = False

    def processEndTag(self, token):
        # Note the caching is done here rather than BoundMethodDispatcher as doing it there
        # requires a circular reference to the Phase, and this ends up with a significant
        # (CPython 3.8) GC cost when parsing many short inputs
        name = token["name"]
        # In Py2, using `in` is quicker in general than try/except KeyError
        # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
        if name in self.__endTagCache:
            func = self.__endTagCache[name]
        else:
            func = self.__endTagCache[name] = self.endTagHandler[name]
            # bound the cache size in case we get loads of unknown tags
            while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
                # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
                self.__endTagCache.pop(next(iter(self.__endTagCache)))
        return func(token)


class InitialPhase(Phase):
    __slots__ = tuple()

    def processSpaceCharacters(self, token):
        pass

    def processComment(self, token):
        self.tree.insertComment(token, self.tree.document)

    def processDoctype(self, token):
        name = token["name"]
        publicId = token["publicId"]
        systemId = token["systemId"]
        correct = token["correct"]

        if (name != "html" or publicId is not None or
                systemId is not None and systemId != "about:legacy-compat"):
            self.parser.parseError("unknown-doctype")

        if publicId is None:
            publicId = ""

        self.tree.insertDoctype(token)

        if publicId != "":
            publicId = publicId.translate(asciiUpper2Lower)

        if (not correct or token["name"] != "html" or
                publicId.startswith(
                    ("+//silmaril//dtd html pro v0r11 19970101//",
                     "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
                     "-//as//dtd html 3.0 aswedit + extensions//",
                     "-//ietf//dtd html 2.0 level 1//",
                     "-//ietf//dtd html 2.0 level 2//",
                     "-//ietf//dtd html 2.0 strict level 1//",
                     "-//ietf//dtd html 2.0 strict level 2//",
                     "-//ietf//dtd html 2.0 strict//",
                     "-//ietf//dtd html 2.0//",
                     "-//ietf//dtd html 2.1e//",
                     "-//ietf//dtd html 3.0//",
                     "-//ietf//dtd html 3.2 final//",
                     "-//ietf//dtd html 3.2//",
                     "-//ietf//dtd html 3//",
                     "-//ietf//dtd html level 0//",
                     "-//ietf//dtd html level 1//",
                     "-//ietf//dtd html level 2//",
                     "-//ietf//dtd html level 3//",
                     "-//ietf//dtd html strict level 0//",
                     "-//ietf//dtd html strict level 1//",
                     "-//ietf//dtd html strict level 2//",
                     "-//ietf//dtd html strict level 3//",
                     "-//ietf//dtd html strict//",
                     "-//ietf//dtd html//",
                     "-//metrius//dtd metrius presentational//",
                     "-//microsoft//dtd internet explorer 2.0 html strict//",
                     "-//microsoft//dtd internet explorer 2.0 html//",
                     "-//microsoft//dtd internet explorer 2.0 tables//",
                     "-//microsoft//dtd internet explorer 3.0 html strict//",
                     "-//microsoft//dtd internet explorer 3.0 html//",
                     "-//microsoft//dtd internet explorer 3.0 tables//",
                     "-//netscape comm. corp.//dtd html//",
                     "-//netscape comm. corp.//dtd strict html//",
                     "-//o'reilly and associates//dtd html 2.0//",
                     "-//o'reilly and associates//dtd html extended 1.0//",
                     "-//o'reilly and associates//dtd html extended relaxed 1.0//",
                     "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
                     "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
                     "-//spyglass//dtd html 2.0 extended//",
                     "-//sq//dtd html 2.0 hotmetal + extensions//",
                     "-//sun microsystems corp.//dtd hotjava html//",
                     "-//sun microsystems corp.//dtd hotjava strict html//",
                     "-//w3c//dtd html 3 1995-03-24//",
                     "-//w3c//dtd html 3.2 draft//",
                     "-//w3c//dtd html 3.2 final//",
                     "-//w3c//dtd html 3.2//",
                     "-//w3c//dtd html 3.2s draft//",
                     "-//w3c//dtd html 4.0 frameset//",
                     "-//w3c//dtd html 4.0 transitional//",
                     "-//w3c//dtd html experimental 19960712//",
                     "-//w3c//dtd html experimental 970421//",
                     "-//w3c//dtd w3 html//",
                     "-//w3o//dtd w3 html 3.0//",
                     "-//webtechs//dtd mozilla html 2.0//",
                     "-//webtechs//dtd mozilla html//")) or
                publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
                             "-/w3c/dtd html 4.0 transitional/en",
                             "html") or
                publicId.startswith(
                    ("-//w3c//dtd html 4.01 frameset//",
                     "-//w3c//dtd html 4.01 transitional//")) and
                systemId is None or
                systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
            self.parser.compatMode = "quirks"
        elif (publicId.startswith(
                ("-//w3c//dtd xhtml 1.0 frameset//",
                 "-//w3c//dtd xhtml 1.0 transitional//")) or
              publicId.startswith(
                  ("-//w3c//dtd html 4.01 frameset//",
                   "-//w3c//dtd html 4.01 transitional//")) and
              systemId is not None):
            self.parser.compatMode = "limited quirks"

        self.parser.phase = self.parser.phases["beforeHtml"]

    def anythingElse(self):
        self.parser.compatMode = "quirks"
        self.parser.phase = self.parser.phases["beforeHtml"]

    def processCharacters(self, token):
        self.parser.parseError("expected-doctype-but-got-chars")
        self.anythingElse()
        return token

    def processStartTag(self, token):
        self.parser.parseError("expected-doctype-but-got-start-tag",
                               {"name": token["name"]})
        self.anythingElse()
        return token

    def processEndTag(self, token):
        self.parser.parseError("expected-doctype-but-got-end-tag",
                               {"name": token["name"]})
        self.anythingElse()
        return token

    def processEOF(self):
        self.parser.parseError("expected-doctype-but-got-eof")
        self.anythingElse()
        return True


class BeforeHtmlPhase(Phase):
    __slots__ = tuple()

    # helper methods
    def insertHtmlElement(self):
        self.tree.insertRoot(impliedTagToken("html", "StartTag"))
        self.parser.phase = self.parser.phases["beforeHead"]

    # other
    def processEOF(self):
        self.insertHtmlElement()
        return True

    def processComment(self, token):
        self.tree.insertComment(token, self.tree.document)

    def processSpaceCharacters(self, token):
        pass

    def processCharacters(self, token):
        self.insertHtmlElement()
        return token

    def processStartTag(self, token):
        if token["name"] == "html":
            self.parser.firstStartTag = True
        self.insertHtmlElement()
        return token

    def processEndTag(self, token):
        if token["name"] not in ("head", "body", "html", "br"):
            self.parser.parseError("unexpected-end-tag-before-html",
                                   {"name": token["name"]})
        else:
            self.insertHtmlElement()
            return token


class BeforeHeadPhase(Phase):
    __slots__ = tuple()

    def processEOF(self):
        self.startTagHead(impliedTagToken("head", "StartTag"))
        return True

    def processSpaceCharacters(self, token):
        pass

    def processCharacters(self, token):
        self.startTagHead(impliedTagToken("head", "StartTag"))
        return token

    def startTagHtml(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagHead(self, token):
        self.tree.insertElement(token)
        self.tree.headPointer = self.tree.openElements[-1]
        self.parser.phase = self.parser.phases["inHead"]

    def startTagOther(self, token):
        self.startTagHead(impliedTagToken("head", "StartTag"))
        return token

    def endTagImplyHead(self, token):
        self.startTagHead(impliedTagToken("head", "StartTag"))
        return token

    def endTagOther(self, token):
        self.parser.parseError("end-tag-after-implied-root",
                               {"name": token["name"]})

    startTagHandler = _utils.MethodDispatcher([
        ("html", startTagHtml),
        ("head", startTagHead)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        (("head", "body", "html", "br"), endTagImplyHead)
    ])
    endTagHandler.default = endTagOther


class InHeadPhase(Phase):
    __slots__ = tuple()

    # the real thing
    def processEOF(self):
        self.anythingElse()
        return True

    def processCharacters(self, token):
        self.anythingElse()
        return token

    def startTagHtml(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagHead(self, token):
        self.parser.parseError("two-heads-are-not-better-than-one")

    def startTagBaseLinkCommand(self, token):
        self.tree.insertElement(token)
        self.tree.openElements.pop()
        token["selfClosingAcknowledged"] = True

    def startTagMeta(self, token):
        self.tree.insertElement(token)
        self.tree.openElements.pop()
        token["selfClosingAcknowledged"] = True

        attributes = token["data"]
        if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
            if "charset" in attributes:
                self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
            elif ("content" in attributes and
                  "http-equiv" in attributes and
                  attributes["http-equiv"].lower() == "content-type"):
                # Encoding it as UTF-8 here is a hack, as really we should pass
                # the abstract Unicode string, and just use the
                # ContentAttrParser on that, but using UTF-8 allows all chars
                # to be encoded and as a ASCII-superset works.
                data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
                parser = _inputstream.ContentAttrParser(data)
                codec = parser.parse()
                self.parser.tokenizer.stream.changeEncoding(codec)

    def startTagTitle(self, token):
        self.parser.parseRCDataRawtext(token, "RCDATA")

    def startTagNoFramesStyle(self, token):
        # Need to decide whether to implement the scripting-disabled case
        self.parser.parseRCDataRawtext(token, "RAWTEXT")

    def startTagNoscript(self, token):
        if self.parser.scripting:
            self.parser.parseRCDataRawtext(token, "RAWTEXT")
        else:
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inHeadNoscript"]

    def startTagScript(self, token):
        self.tree.insertElement(token)
        self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
        self.parser.originalPhase = self.parser.phase
        self.parser.phase = self.parser.phases["text"]

    def startTagOther(self, token):
        self.anythingElse()
        return token

    def endTagHead(self, token):
        node = self.parser.tree.openElements.pop()
        assert node.name == "head", "Expected head got %s" % node.name
        self.parser.phase = self.parser.phases["afterHead"]

    def endTagHtmlBodyBr(self, token):
        self.anythingElse()
        return token

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

    def anythingElse(self):
        self.endTagHead(impliedTagToken("head"))

    startTagHandler = _utils.MethodDispatcher([
        ("html", startTagHtml),
        ("title", startTagTitle),
        (("noframes", "style"), startTagNoFramesStyle),
        ("noscript", startTagNoscript),
        ("script", startTagScript),
        (("base", "basefont", "bgsound", "command", "link"),
         startTagBaseLinkCommand),
        ("meta", startTagMeta),
        ("head", startTagHead)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("head", endTagHead),
        (("br", "html", "body"), endTagHtmlBodyBr)
    ])
    endTagHandler.default = endTagOther


class InHeadNoscriptPhase(Phase):
    __slots__ = tuple()

    def processEOF(self):
        self.parser.parseError("eof-in-head-noscript")
        self.anythingElse()
        return True

    def processComment(self, token):
        return self.parser.phases["inHead"].processComment(token)

    def processCharacters(self, token):
        self.parser.parseError("char-in-head-noscript")
        self.anythingElse()
        return token

    def processSpaceCharacters(self, token):
        return self.parser.phases["inHead"].processSpaceCharacters(token)

    def startTagHtml(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagBaseLinkCommand(self, token):
        return self.parser.phases["inHead"].processStartTag(token)

    def startTagHeadNoscript(self, token):
        self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

    def startTagOther(self, token):
        self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
        self.anythingElse()
        return token

    def endTagNoscript(self, token):
        node = self.parser.tree.openElements.pop()
        assert node.name == "noscript", "Expected noscript got %s" % node.name
        self.parser.phase = self.parser.phases["inHead"]

    def endTagBr(self, token):
        self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
        self.anythingElse()
        return token

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

    def anythingElse(self):
        # Caller must raise parse error first!
        self.endTagNoscript(impliedTagToken("noscript"))

    startTagHandler = _utils.MethodDispatcher([
        ("html", startTagHtml),
        (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
        (("head", "noscript"), startTagHeadNoscript),
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("noscript", endTagNoscript),
        ("br", endTagBr),
    ])
    endTagHandler.default = endTagOther


class AfterHeadPhase(Phase):
    __slots__ = tuple()

    def processEOF(self):
        self.anythingElse()
        return True

    def processCharacters(self, token):
        self.anythingElse()
        return token

    def startTagHtml(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagBody(self, token):
        self.parser.framesetOK = False
        self.tree.insertElement(token)
        self.parser.phase = self.parser.phases["inBody"]

    def startTagFrameset(self, token):
        self.tree.insertElement(token)
        self.parser.phase = self.parser.phases["inFrameset"]

    def startTagFromHead(self, token):
        self.parser.parseError("unexpected-start-tag-out-of-my-head",
                               {"name": token["name"]})
        self.tree.openElements.append(self.tree.headPointer)
        self.parser.phases["inHead"].processStartTag(token)
        for node in self.tree.openElements[::-1]:
            if node.name == "head":
                self.tree.openElements.remove(node)
                break

    def startTagHead(self, token):
        self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

    def startTagOther(self, token):
        self.anythingElse()
        return token

    def endTagHtmlBodyBr(self, token):
        self.anythingElse()
        return token

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

    def anythingElse(self):
        self.tree.insertElement(impliedTagToken("body", "StartTag"))
        self.parser.phase = self.parser.phases["inBody"]
        self.parser.framesetOK = True

    startTagHandler = _utils.MethodDispatcher([
        ("html", startTagHtml),
        ("body", startTagBody),
        ("frameset", startTagFrameset),
        (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
          "style", "title"),
         startTagFromHead),
        ("head", startTagHead)
    ])
    startTagHandler.default = startTagOther
    endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
                                              endTagHtmlBodyBr)])
    endTagHandler.default = endTagOther


class InBodyPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
    # the really-really-really-very crazy mode
    __slots__ = ("processSpaceCharacters",)

    def __init__(self, *args, **kwargs):
        super(InBodyPhase, self).__init__(*args, **kwargs)
        # Set this to the default handler
        self.processSpaceCharacters = self.processSpaceCharactersNonPre

    def isMatchingFormattingElement(self, node1, node2):
        return (node1.name == node2.name and
                node1.namespace == node2.namespace and
                node1.attributes == node2.attributes)

    # helper
    def addFormattingElement(self, token):
        self.tree.insertElement(token)
        element = self.tree.openElements[-1]

        matchingElements = []
        for node in self.tree.activeFormattingElements[::-1]:
            if node is Marker:
                break
            elif self.isMatchingFormattingElement(node, element):
                matchingElements.append(node)

        assert len(matchingElements) <= 3
        if len(matchingElements) == 3:
            self.tree.activeFormattingElements.remove(matchingElements[-1])
        self.tree.activeFormattingElements.append(element)

    # the real deal
    def processEOF(self):
        allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
                                      "tfoot", "th", "thead", "tr", "body",
                                      "html"))
        for node in self.tree.openElements[::-1]:
            if node.name not in allowed_elements:
                self.parser.parseError("expected-closing-tag-but-got-eof")
                break
        # Stop parsing

    def processSpaceCharactersDropNewline(self, token):
        # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
        # want to drop leading newlines
        data = token["data"]
        self.processSpaceCharacters = self.processSpaceCharactersNonPre
        if (data.startswith("\n") and
            self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
                not self.tree.openElements[-1].hasContent()):
            data = data[1:]
        if data:
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertText(data)

    def processCharacters(self, token):
        if token["data"] == "\u0000":
            # The tokenizer should always emit null on its own
            return
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertText(token["data"])
        # This must be bad for performance
        if (self.parser.framesetOK and
            any(char not in spaceCharacters
                for char in token["data"])):
            self.parser.framesetOK = False

    def processSpaceCharactersNonPre(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertText(token["data"])

    def startTagProcessInHead(self, token):
        return self.parser.phases["inHead"].processStartTag(token)

    def startTagBody(self, token):
        self.parser.parseError("unexpected-start-tag", {"name": "body"})
        if (len(self.tree.openElements) == 1 or
                self.tree.openElements[1].name != "body"):
            assert self.parser.innerHTML
        else:
            self.parser.framesetOK = False
            for attr, value in token["data"].items():
                if attr not in self.tree.openElements[1].attributes:
                    self.tree.openElements[1].attributes[attr] = value

    def startTagFrameset(self, token):
        self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
        if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
            assert self.parser.innerHTML
        elif not self.parser.framesetOK:
            pass
        else:
            if self.tree.openElements[1].parent:
                self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
            while self.tree.openElements[-1].name != "html":
                self.tree.openElements.pop()
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inFrameset"]

    def startTagCloseP(self, token):
        if self.tree.elementInScope("p", variant="button"):
            self.endTagP(impliedTagToken("p"))
        self.tree.insertElement(token)

    def startTagPreListing(self, token):
        if self.tree.elementInScope("p", variant="button"):
            self.endTagP(impliedTagToken("p"))
        self.tree.insertElement(token)
        self.parser.framesetOK = False
        self.processSpaceCharacters = self.processSpaceCharactersDropNewline

    def startTagForm(self, token):
        if self.tree.formPointer:
            self.parser.parseError("unexpected-start-tag", {"name": "form"})
        else:
            if self.tree.elementInScope("p", variant="button"):
                self.endTagP(impliedTagToken("p"))
            self.tree.insertElement(token)
            self.tree.formPointer = self.tree.openElements[-1]

    def startTagListItem(self, token):
        self.parser.framesetOK = False

        stopNamesMap = {"li": ["li"],
                        "dt": ["dt", "dd"],
                        "dd": ["dt", "dd"]}
        stopNames = stopNamesMap[token["name"]]
        for node in reversed(self.tree.openElements):
            if node.name in stopNames:
                self.parser.phase.processEndTag(
                    impliedTagToken(node.name, "EndTag"))
                break
            if (node.nameTuple in specialElements and
                    node.name not in ("address", "div", "p")):
                break

        if self.tree.elementInScope("p", variant="button"):
            self.parser.phase.processEndTag(
                impliedTagToken("p", "EndTag"))

        self.tree.insertElement(token)

    def startTagPlaintext(self, token):
        if self.tree.elementInScope("p", variant="button"):
            self.endTagP(impliedTagToken("p"))
        self.tree.insertElement(token)
        self.parser.tokenizer.state = self.parser.tokenizer.plaintextState

    def startTagHeading(self, token):
        if self.tree.elementInScope("p", variant="button"):
            self.endTagP(impliedTagToken("p"))
        if self.tree.openElements[-1].name in headingElements:
            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
            self.tree.openElements.pop()
        self.tree.insertElement(token)

    def startTagA(self, token):
        afeAElement = self.tree.elementInActiveFormattingElements("a")
        if afeAElement:
            self.parser.parseError("unexpected-start-tag-implies-end-tag",
                                   {"startName": "a", "endName": "a"})
            self.endTagFormatting(impliedTagToken("a"))
            if afeAElement in self.tree.openElements:
                self.tree.openElements.remove(afeAElement)
            if afeAElement in self.tree.activeFormattingElements:
                self.tree.activeFormattingElements.remove(afeAElement)
        self.tree.reconstructActiveFormattingElements()
        self.addFormattingElement(token)

    def startTagFormatting(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.addFormattingElement(token)

    def startTagNobr(self, token):
        self.tree.reconstructActiveFormattingElements()
        if self.tree.elementInScope("nobr"):
            self.parser.parseError("unexpected-start-tag-implies-end-tag",
                                   {"startName": "nobr", "endName": "nobr"})
            self.processEndTag(impliedTagToken("nobr"))
            # XXX Need tests that trigger the following
            self.tree.reconstructActiveFormattingElements()
        self.addFormattingElement(token)

    def startTagButton(self, token):
        if self.tree.elementInScope("button"):
            self.parser.parseError("unexpected-start-tag-implies-end-tag",
                                   {"startName": "button", "endName": "button"})
            self.processEndTag(impliedTagToken("button"))
            return token
        else:
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertElement(token)
            self.parser.framesetOK = False

    def startTagAppletMarqueeObject(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(token)
        self.tree.activeFormattingElements.append(Marker)
        self.parser.framesetOK = False

    def startTagXmp(self, token):
        if self.tree.elementInScope("p", variant="button"):
            self.endTagP(impliedTagToken("p"))
        self.tree.reconstructActiveFormattingElements()
        self.parser.framesetOK = False
        self.parser.parseRCDataRawtext(token, "RAWTEXT")

    def startTagTable(self, token):
        if self.parser.compatMode != "quirks":
            if self.tree.elementInScope("p", variant="button"):
                self.processEndTag(impliedTagToken("p"))
        self.tree.insertElement(token)
        self.parser.framesetOK = False
        self.parser.phase = self.parser.phases["inTable"]

    def startTagVoidFormatting(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(token)
        self.tree.openElements.pop()
        token["selfClosingAcknowledged"] = True
        self.parser.framesetOK = False

    def startTagInput(self, token):
        framesetOK = self.parser.framesetOK
        self.startTagVoidFormatting(token)
        if ("type" in token["data"] and
                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
            # input type=hidden doesn't change framesetOK
            self.parser.framesetOK = framesetOK

    def startTagParamSource(self, token):
        self.tree.insertElement(token)
        self.tree.openElements.pop()
        token["selfClosingAcknowledged"] = True

    def startTagHr(self, token):
        if self.tree.elementInScope("p", variant="button"):
            self.endTagP(impliedTagToken("p"))
        self.tree.insertElement(token)
        self.tree.openElements.pop()
        token["selfClosingAcknowledged"] = True
        self.parser.framesetOK = False

    def startTagImage(self, token):
        # No really...
        self.parser.parseError("unexpected-start-tag-treated-as",
                               {"originalName": "image", "newName": "img"})
        self.processStartTag(impliedTagToken("img", "StartTag",
                                             attributes=token["data"],
                                             selfClosing=token["selfClosing"]))

    def startTagIsIndex(self, token):
        self.parser.parseError("deprecated-tag", {"name": "isindex"})
        if self.tree.formPointer:
            return
        form_attrs = {}
        if "action" in token["data"]:
            form_attrs["action"] = token["data"]["action"]
        self.processStartTag(impliedTagToken("form", "StartTag",
                                             attributes=form_attrs))
        self.processStartTag(impliedTagToken("hr", "StartTag"))
        self.processStartTag(impliedTagToken("label", "StartTag"))
        # XXX Localization ...
        if "prompt" in token["data"]:
            prompt = token["data"]["prompt"]
        else:
            prompt = "This is a searchable index. Enter search keywords: "
        self.processCharacters(
            {"type": tokenTypes["Characters"], "data": prompt})
        attributes = token["data"].copy()
        if "action" in attributes:
            del attributes["action"]
        if "prompt" in attributes:
            del attributes["prompt"]
        attributes["name"] = "isindex"
        self.processStartTag(impliedTagToken("input", "StartTag",
                                             attributes=attributes,
                                             selfClosing=token["selfClosing"]))
        self.processEndTag(impliedTagToken("label"))
        self.processStartTag(impliedTagToken("hr", "StartTag"))
        self.processEndTag(impliedTagToken("form"))

    def startTagTextarea(self, token):
        self.tree.insertElement(token)
        self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
        self.processSpaceCharacters = self.processSpaceCharactersDropNewline
        self.parser.framesetOK = False

    def startTagIFrame(self, token):
        self.parser.framesetOK = False
        self.startTagRawtext(token)

    def startTagNoscript(self, token):
        if self.parser.scripting:
            self.startTagRawtext(token)
        else:
            self.startTagOther(token)

    def startTagRawtext(self, token):
        """iframe, noembed noframes, noscript(if scripting enabled)"""
        self.parser.parseRCDataRawtext(token, "RAWTEXT")

    def startTagOpt(self, token):
        if self.tree.openElements[-1].name == "option":
            self.parser.phase.processEndTag(impliedTagToken("option"))
        self.tree.reconstructActiveFormattingElements()
        self.parser.tree.insertElement(token)

    def startTagSelect(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(token)
        self.parser.framesetOK = False
        if self.parser.phase in (self.parser.phases["inTable"],
                                 self.parser.phases["inCaption"],
                                 self.parser.phases["inColumnGroup"],
                                 self.parser.phases["inTableBody"],
                                 self.parser.phases["inRow"],
                                 self.parser.phases["inCell"]):
            self.parser.phase = self.parser.phases["inSelectInTable"]
        else:
            self.parser.phase = self.parser.phases["inSelect"]

    def startTagRpRt(self, token):
        if self.tree.elementInScope("ruby"):
            self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1].name != "ruby":
                self.parser.parseError()
        self.tree.insertElement(token)

    def startTagMath(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.parser.adjustMathMLAttributes(token)
        self.parser.adjustForeignAttributes(token)
        token["namespace"] = namespaces["mathml"]
        self.tree.insertElement(token)
        # Need to get the parse error right for the case where the token
        # has a namespace not equal to the xmlns attribute
        if token["selfClosing"]:
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

    def startTagSvg(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.parser.adjustSVGAttributes(token)
        self.parser.adjustForeignAttributes(token)
        token["namespace"] = namespaces["svg"]
        self.tree.insertElement(token)
        # Need to get the parse error right for the case where the token
        # has a namespace not equal to the xmlns attribute
        if token["selfClosing"]:
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

    def startTagMisplaced(self, token):
        """ Elements that should be children of other elements that have a
        different insertion mode; here they are ignored
        "caption", "col", "colgroup", "frame", "frameset", "head",
        "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
        "tr", "noscript"
        """
        self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})

    def startTagOther(self, token):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(token)

    def endTagP(self, token):
        if not self.tree.elementInScope("p", variant="button"):
            self.startTagCloseP(impliedTagToken("p", "StartTag"))
            self.parser.parseError("unexpected-end-tag", {"name": "p"})
            self.endTagP(impliedTagToken("p", "EndTag"))
        else:
            self.tree.generateImpliedEndTags("p")
            if self.tree.openElements[-1].name != "p":
                self.parser.parseError("unexpected-end-tag", {"name": "p"})
            node = self.tree.openElements.pop()
            while node.name != "p":
                node = self.tree.openElements.pop()

    def endTagBody(self, token):
        if not self.tree.elementInScope("body"):
            self.parser.parseError()
            return
        elif self.tree.openElements[-1].name != "body":
            for node in self.tree.openElements[2:]:
                if node.name not in frozenset(("dd", "dt", "li", "optgroup",
                                               "option", "p", "rp", "rt",
                                               "tbody", "td", "tfoot",
                                               "th", "thead", "tr", "body",
                                               "html")):
                    # Not sure this is the correct name for the parse error
                    self.parser.parseError(
                        "expected-one-end-tag-but-got-another",
                        {"gotName": "body", "expectedName": node.name})
                    break
        self.parser.phase = self.parser.phases["afterBody"]

    def endTagHtml(self, token):
        # We repeat the test for the body end tag token being ignored here
        if self.tree.elementInScope("body"):
            self.endTagBody(impliedTagToken("body"))
            return token

    def endTagBlock(self, token):
        # Put us back in the right whitespace handling mode
        if token["name"] == "pre":
            self.processSpaceCharacters = self.processSpaceCharactersNonPre
        inScope = self.tree.elementInScope(token["name"])
        if inScope:
            self.tree.generateImpliedEndTags()
        if self.tree.openElements[-1].name != token["name"]:
            self.parser.parseError("end-tag-too-early", {"name": token["name"]})
        if inScope:
            node = self.tree.openElements.pop()
            while node.name != token["name"]:
                node = self.tree.openElements.pop()

    def endTagForm(self, token):
        node = self.tree.formPointer
        self.tree.formPointer = None
        if node is None or not self.tree.elementInScope(node):
            self.parser.parseError("unexpected-end-tag",
                                   {"name": "form"})
        else:
            self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1] != node:
                self.parser.parseError("end-tag-too-early-ignored",
                                       {"name": "form"})
            self.tree.openElements.remove(node)

    def endTagListItem(self, token):
        if token["name"] == "li":
            variant = "list"
        else:
            variant = None
        if not self.tree.elementInScope(token["name"], variant=variant):
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
        else:
            self.tree.generateImpliedEndTags(exclude=token["name"])
            if self.tree.openElements[-1].name != token["name"]:
                self.parser.parseError(
                    "end-tag-too-early",
                    {"name": token["name"]})
            node = self.tree.openElements.pop()
            while node.name != token["name"]:
                node = self.tree.openElements.pop()

    def endTagHeading(self, token):
        for item in headingElements:
            if self.tree.elementInScope(item):
                self.tree.generateImpliedEndTags()
                break
        if self.tree.openElements[-1].name != token["name"]:
            self.parser.parseError("end-tag-too-early", {"name": token["name"]})

        for item in headingElements:
            if self.tree.elementInScope(item):
                item = self.tree.openElements.pop()
                while item.name not in headingElements:
                    item = self.tree.openElements.pop()
                break

    def endTagFormatting(self, token):
        """The much-feared adoption agency algorithm"""
        # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
        # XXX Better parseError messages appreciated.

        # Step 1
        outerLoopCounter = 0

        # Step 2
        while outerLoopCounter < 8:

            # Step 3
            outerLoopCounter += 1

            # Step 4:

            # Let the formatting element be the last element in
            # the list of active formatting elements that:
            # - is between the end of the list and the last scope
            # marker in the list, if any, or the start of the list
            # otherwise, and
            # - has the same tag name as the token.
            formattingElement = self.tree.elementInActiveFormattingElements(
                token["name"])
            if (not formattingElement or
                (formattingElement in self.tree.openElements and
                 not self.tree.elementInScope(formattingElement.name))):
                # If there is no such node, then abort these steps
                # and instead act as described in the "any other
                # end tag" entry below.
                self.endTagOther(token)
                return

            # Otherwise, if there is such a node, but that node is
            # not in the stack of open elements, then this is a
            # parse error; remove the element from the list, and
            # abort these steps.
            elif formattingElement not in self.tree.openElements:
                self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
                self.tree.activeFormattingElements.remove(formattingElement)
                return

            # Otherwise, if there is such a node, and that node is
            # also in the stack of open elements, but the element
            # is not in scope, then this is a parse error; ignore
            # the token, and abort these steps.
            elif not self.tree.elementInScope(formattingElement.name):
                self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
                return

            # Otherwise, there is a formatting element and that
            # element is in the stack and is in scope. If the
            # element is not the current node, this is a parse
            # error. In any case, proceed with the algorithm as
            # written in the following steps.
            else:
                if formattingElement != self.tree.openElements[-1]:
                    self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})

            # Step 5:

            # Let the furthest block be the topmost node in the
            # stack of open elements that is lower in the stack
            # than the formatting element, and is an element in
            # the special category. There might not be one.
            afeIndex = self.tree.openElements.index(formattingElement)
            furthestBlock = None
            for element in self.tree.openElements[afeIndex:]:
                if element.nameTuple in specialElements:
                    furthestBlock = element
                    break

            # Step 6:

            # If there is no furthest block, then the UA must
            # first pop all the nodes from the bottom of the stack
            # of open elements, from the current node up to and
            # including the formatting element, then remove the
            # formatting element from the list of active
            # formatting elements, and finally abort these steps.
            if furthestBlock is None:
                element = self.tree.openElements.pop()
                while element != formattingElement:
                    element = self.tree.openElements.pop()
                self.tree.activeFormattingElements.remove(element)
                return

            # Step 7
            commonAncestor = self.tree.openElements[afeIndex - 1]

            # Step 8:
            # The bookmark is supposed to help us identify where to reinsert
            # nodes in step 15. We have to ensure that we reinsert nodes after
            # the node before the active formatting element. Note the bookmark
            # can move in step 9.7
            bookmark = self.tree.activeFormattingElements.index(formattingElement)

            # Step 9
            lastNode = node = furthestBlock
            innerLoopCounter = 0

            index = self.tree.openElements.index(node)
            while innerLoopCounter < 3:
                innerLoopCounter += 1
                # Node is element before node in open elements
                index -= 1
                node = self.tree.openElements[index]
                if node not in self.tree.activeFormattingElements:
                    self.tree.openElements.remove(node)
                    continue
                # Step 9.6
                if node == formattingElement:
                    break
                # Step 9.7
                if lastNode == furthestBlock:
                    bookmark = self.tree.activeFormattingElements.index(node) + 1
                # Step 9.8
                clone = node.cloneNode()
                # Replace node with clone
                self.tree.activeFormattingElements[
                    self.tree.activeFormattingElements.index(node)] = clone
                self.tree.openElements[
                    self.tree.openElements.index(node)] = clone
                node = clone
                # Step 9.9
                # Remove lastNode from its parents, if any
                if lastNode.parent:
                    lastNode.parent.removeChild(lastNode)
                node.appendChild(lastNode)
                # Step 9.10
                lastNode = node

            # Step 10
            # Foster parent lastNode if commonAncestor is a
            # table, tbody, tfoot, thead, or tr we need to foster
            # parent the lastNode
            if lastNode.parent:
                lastNode.parent.removeChild(lastNode)

            if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
                parent, insertBefore = self.tree.getTableMisnestedNodePosition()
                parent.insertBefore(lastNode, insertBefore)
            else:
                commonAncestor.appendChild(lastNode)

            # Step 11
            clone = formattingElement.cloneNode()

            # Step 12
            furthestBlock.reparentChildren(clone)

            # Step 13
            furthestBlock.appendChild(clone)

            # Step 14
            self.tree.activeFormattingElements.remove(formattingElement)
            self.tree.activeFormattingElements.insert(bookmark, clone)

            # Step 15
            self.tree.openElements.remove(formattingElement)
            self.tree.openElements.insert(
                self.tree.openElements.index(furthestBlock) + 1, clone)

    def endTagAppletMarqueeObject(self, token):
        if self.tree.elementInScope(token["name"]):
            self.tree.generateImpliedEndTags()
        if self.tree.openElements[-1].name != token["name"]:
            self.parser.parseError("end-tag-too-early", {"name": token["name"]})

        if self.tree.elementInScope(token["name"]):
            element = self.tree.openElements.pop()
            while element.name != token["name"]:
                element = self.tree.openElements.pop()
            self.tree.clearActiveFormattingElements()

    def endTagBr(self, token):
        self.parser.parseError("unexpected-end-tag-treated-as",
                               {"originalName": "br", "newName": "br element"})
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(impliedTagToken("br", "StartTag"))
        self.tree.openElements.pop()

    def endTagOther(self, token):
        for node in self.tree.openElements[::-1]:
            if node.name == token["name"]:
                self.tree.generateImpliedEndTags(exclude=token["name"])
                if self.tree.openElements[-1].name != token["name"]:
                    self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
                while self.tree.openElements.pop() != node:
                    pass
                break
            else:
                if node.nameTuple in specialElements:
                    self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
                    break

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        (("base", "basefont", "bgsound", "command", "link", "meta",
          "script", "style", "title"),
         startTagProcessInHead),
        ("body", startTagBody),
        ("frameset", startTagFrameset),
        (("address", "article", "aside", "blockquote", "center", "details",
          "dir", "div", "dl", "fieldset", "figcaption", "figure",
          "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
          "section", "summary", "ul"),
         startTagCloseP),
        (headingElements, startTagHeading),
        (("pre", "listing"), startTagPreListing),
        ("form", startTagForm),
        (("li", "dd", "dt"), startTagListItem),
        ("plaintext", startTagPlaintext),
        ("a", startTagA),
        (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
          "strong", "tt", "u"), startTagFormatting),
        ("nobr", startTagNobr),
        ("button", startTagButton),
        (("applet", "marquee", "object"), startTagAppletMarqueeObject),
        ("xmp", startTagXmp),
        ("table", startTagTable),
        (("area", "br", "embed", "img", "keygen", "wbr"),
         startTagVoidFormatting),
        (("param", "source", "track"), startTagParamSource),
        ("input", startTagInput),
        ("hr", startTagHr),
        ("image", startTagImage),
        ("isindex", startTagIsIndex),
        ("textarea", startTagTextarea),
        ("iframe", startTagIFrame),
        ("noscript", startTagNoscript),
        (("noembed", "noframes"), startTagRawtext),
        ("select", startTagSelect),
        (("rp", "rt"), startTagRpRt),
        (("option", "optgroup"), startTagOpt),
        (("math"), startTagMath),
        (("svg"), startTagSvg),
        (("caption", "col", "colgroup", "frame", "head",
          "tbody", "td", "tfoot", "th", "thead",
          "tr"), startTagMisplaced)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("body", endTagBody),
        ("html", endTagHtml),
        (("address", "article", "aside", "blockquote", "button", "center",
          "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
          "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
          "section", "summary", "ul"), endTagBlock),
        ("form", endTagForm),
        ("p", endTagP),
        (("dd", "dt", "li"), endTagListItem),
        (headingElements, endTagHeading),
        (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
          "strike", "strong", "tt", "u"), endTagFormatting),
        (("applet", "marquee", "object"), endTagAppletMarqueeObject),
        ("br", endTagBr),
    ])
    endTagHandler.default = endTagOther


class TextPhase(Phase):
    __slots__ = tuple()

    def processCharacters(self, token):
        self.tree.insertText(token["data"])

    def processEOF(self):
        self.parser.parseError("expected-named-closing-tag-but-got-eof",
                               {"name": self.tree.openElements[-1].name})
        self.tree.openElements.pop()
        self.parser.phase = self.parser.originalPhase
        return True

    def startTagOther(self, token):
        assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']

    def endTagScript(self, token):
        node = self.tree.openElements.pop()
        assert node.name == "script"
        self.parser.phase = self.parser.originalPhase
        # The rest of this method is all stuff that only happens if
        # document.write works

    def endTagOther(self, token):
        self.tree.openElements.pop()
        self.parser.phase = self.parser.originalPhase

    startTagHandler = _utils.MethodDispatcher([])
    startTagHandler.default = startTagOther
    endTagHandler = _utils.MethodDispatcher([
        ("script", endTagScript)])
    endTagHandler.default = endTagOther


class InTablePhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
    __slots__ = tuple()

    # helper methods
    def clearStackToTableContext(self):
        # "clear the stack back to a table context"
        while self.tree.openElements[-1].name not in ("table", "html"):
            # self.parser.parseError("unexpected-implied-end-tag-in-table",
            #  {"name":  self.tree.openElements[-1].name})
            self.tree.openElements.pop()
        # When the current node is <html> it's an innerHTML case

    # processing methods
    def processEOF(self):
        if self.tree.openElements[-1].name != "html":
            self.parser.parseError("eof-in-table")
        else:
            assert self.parser.innerHTML
        # Stop parsing

    def processSpaceCharacters(self, token):
        originalPhase = self.parser.phase
        self.parser.phase = self.parser.phases["inTableText"]
        self.parser.phase.originalPhase = originalPhase
        self.parser.phase.processSpaceCharacters(token)

    def processCharacters(self, token):
        originalPhase = self.parser.phase
        self.parser.phase = self.parser.phases["inTableText"]
        self.parser.phase.originalPhase = originalPhase
        self.parser.phase.processCharacters(token)

    def insertText(self, token):
        # If we get here there must be at least one non-whitespace character
        # Do the table magic!
        self.tree.insertFromTable = True
        self.parser.phases["inBody"].processCharacters(token)
        self.tree.insertFromTable = False

    def startTagCaption(self, token):
        self.clearStackToTableContext()
        self.tree.activeFormattingElements.append(Marker)
        self.tree.insertElement(token)
        self.parser.phase = self.parser.phases["inCaption"]

    def startTagColgroup(self, token):
        self.clearStackToTableContext()
        self.tree.insertElement(token)
        self.parser.phase = self.parser.phases["inColumnGroup"]

    def startTagCol(self, token):
        self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
        return token

    def startTagRowGroup(self, token):
        self.clearStackToTableContext()
        self.tree.insertElement(token)
        self.parser.phase = self.parser.phases["inTableBody"]

    def startTagImplyTbody(self, token):
        self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
        return token

    def startTagTable(self, token):
        self.parser.parseError("unexpected-start-tag-implies-end-tag",
                               {"startName": "table", "endName": "table"})
        self.parser.phase.processEndTag(impliedTagToken("table"))
        if not self.parser.innerHTML:
            return token

    def startTagStyleScript(self, token):
        return self.parser.phases["inHead"].processStartTag(token)

    def startTagInput(self, token):
        if ("type" in token["data"] and
                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
            self.parser.parseError("unexpected-hidden-input-in-table")
            self.tree.insertElement(token)
            # XXX associate with form
            self.tree.openElements.pop()
        else:
            self.startTagOther(token)

    def startTagForm(self, token):
        self.parser.parseError("unexpected-form-in-table")
        if self.tree.formPointer is None:
            self.tree.insertElement(token)
            self.tree.formPointer = self.tree.openElements[-1]
            self.tree.openElements.pop()

    def startTagOther(self, token):
        self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
        # Do the table magic!
        self.tree.insertFromTable = True
        self.parser.phases["inBody"].processStartTag(token)
        self.tree.insertFromTable = False

    def endTagTable(self, token):
        if self.tree.elementInScope("table", variant="table"):
            self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1].name != "table":
                self.parser.parseError("end-tag-too-early-named",
                                       {"gotName": "table",
                                        "expectedName": self.tree.openElements[-1].name})
            while self.tree.openElements[-1].name != "table":
                self.tree.openElements.pop()
            self.tree.openElements.pop()
            self.parser.resetInsertionMode()
        else:
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()

    def endTagIgnore(self, token):
        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
        # Do the table magic!
        self.tree.insertFromTable = True
        self.parser.phases["inBody"].processEndTag(token)
        self.tree.insertFromTable = False

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        ("caption", startTagCaption),
        ("colgroup", startTagColgroup),
        ("col", startTagCol),
        (("tbody", "tfoot", "thead"), startTagRowGroup),
        (("td", "th", "tr"), startTagImplyTbody),
        ("table", startTagTable),
        (("style", "script"), startTagStyleScript),
        ("input", startTagInput),
        ("form", startTagForm)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("table", endTagTable),
        (("body", "caption", "col", "colgroup", "html", "tbody", "td",
          "tfoot", "th", "thead", "tr"), endTagIgnore)
    ])
    endTagHandler.default = endTagOther


class InTableTextPhase(Phase):
    __slots__ = ("originalPhase", "characterTokens")

    def __init__(self, *args, **kwargs):
        super(InTableTextPhase, self).__init__(*args, **kwargs)
        self.originalPhase = None
        self.characterTokens = []

    def flushCharacters(self):
        data = "".join([item["data"] for item in self.characterTokens])
        if any(item not in spaceCharacters for item in data):
            token = {"type": tokenTypes["Characters"], "data": data}
            self.parser.phases["inTable"].insertText(token)
        elif data:
            self.tree.insertText(data)
        self.characterTokens = []

    def processComment(self, token):
        self.flushCharacters()
        self.parser.phase = self.originalPhase
        return token

    def processEOF(self):
        self.flushCharacters()
        self.parser.phase = self.originalPhase
        return True

    def processCharacters(self, token):
        if token["data"] == "\u0000":
            return
        self.characterTokens.append(token)

    def processSpaceCharacters(self, token):
        # pretty sure we should never reach here
        self.characterTokens.append(token)
#        assert False

    def processStartTag(self, token):
        self.flushCharacters()
        self.parser.phase = self.originalPhase
        return token

    def processEndTag(self, token):
        self.flushCharacters()
        self.parser.phase = self.originalPhase
        return token


class InCaptionPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
    __slots__ = tuple()

    def ignoreEndTagCaption(self):
        return not self.tree.elementInScope("caption", variant="table")

    def processEOF(self):
        self.parser.phases["inBody"].processEOF()

    def processCharacters(self, token):
        return self.parser.phases["inBody"].processCharacters(token)

    def startTagTableElement(self, token):
        self.parser.parseError()
        # XXX Have to duplicate logic here to find out if the tag is ignored
        ignoreEndTag = self.ignoreEndTagCaption()
        self.parser.phase.processEndTag(impliedTagToken("caption"))
        if not ignoreEndTag:
            return token

    def startTagOther(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def endTagCaption(self, token):
        if not self.ignoreEndTagCaption():
            # AT this code is quite similar to endTagTable in "InTable"
            self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1].name != "caption":
                self.parser.parseError("expected-one-end-tag-but-got-another",
                                       {"gotName": "caption",
                                        "expectedName": self.tree.openElements[-1].name})
            while self.tree.openElements[-1].name != "caption":
                self.tree.openElements.pop()
            self.tree.openElements.pop()
            self.tree.clearActiveFormattingElements()
            self.parser.phase = self.parser.phases["inTable"]
        else:
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()

    def endTagTable(self, token):
        self.parser.parseError()
        ignoreEndTag = self.ignoreEndTagCaption()
        self.parser.phase.processEndTag(impliedTagToken("caption"))
        if not ignoreEndTag:
            return token

    def endTagIgnore(self, token):
        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

    def endTagOther(self, token):
        return self.parser.phases["inBody"].processEndTag(token)

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
          "thead", "tr"), startTagTableElement)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("caption", endTagCaption),
        ("table", endTagTable),
        (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
          "thead", "tr"), endTagIgnore)
    ])
    endTagHandler.default = endTagOther


class InColumnGroupPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-column
    __slots__ = tuple()

    def ignoreEndTagColgroup(self):
        return self.tree.openElements[-1].name == "html"

    def processEOF(self):
        if self.tree.openElements[-1].name == "html":
            assert self.parser.innerHTML
            return
        else:
            ignoreEndTag = self.ignoreEndTagColgroup()
            self.endTagColgroup(impliedTagToken("colgroup"))
            if not ignoreEndTag:
                return True

    def processCharacters(self, token):
        ignoreEndTag = self.ignoreEndTagColgroup()
        self.endTagColgroup(impliedTagToken("colgroup"))
        if not ignoreEndTag:
            return token

    def startTagCol(self, token):
        self.tree.insertElement(token)
        self.tree.openElements.pop()
        token["selfClosingAcknowledged"] = True

    def startTagOther(self, token):
        ignoreEndTag = self.ignoreEndTagColgroup()
        self.endTagColgroup(impliedTagToken("colgroup"))
        if not ignoreEndTag:
            return token

    def endTagColgroup(self, token):
        if self.ignoreEndTagColgroup():
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()
        else:
            self.tree.openElements.pop()
            self.parser.phase = self.parser.phases["inTable"]

    def endTagCol(self, token):
        self.parser.parseError("no-end-tag", {"name": "col"})

    def endTagOther(self, token):
        ignoreEndTag = self.ignoreEndTagColgroup()
        self.endTagColgroup(impliedTagToken("colgroup"))
        if not ignoreEndTag:
            return token

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        ("col", startTagCol)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("colgroup", endTagColgroup),
        ("col", endTagCol)
    ])
    endTagHandler.default = endTagOther


class InTableBodyPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
    __slots__ = tuple()

    # helper methods
    def clearStackToTableBodyContext(self):
        while self.tree.openElements[-1].name not in ("tbody", "tfoot",
                                                      "thead", "html"):
            # self.parser.parseError("unexpected-implied-end-tag-in-table",
            #  {"name": self.tree.openElements[-1].name})
            self.tree.openElements.pop()
        if self.tree.openElements[-1].name == "html":
            assert self.parser.innerHTML

    # the rest
    def processEOF(self):
        self.parser.phases["inTable"].processEOF()

    def processSpaceCharacters(self, token):
        return self.parser.phases["inTable"].processSpaceCharacters(token)

    def processCharacters(self, token):
        return self.parser.phases["inTable"].processCharacters(token)

    def startTagTr(self, token):
        self.clearStackToTableBodyContext()
        self.tree.insertElement(token)
        self.parser.phase = self.parser.phases["inRow"]

    def startTagTableCell(self, token):
        self.parser.parseError("unexpected-cell-in-table-body",
                               {"name": token["name"]})
        self.startTagTr(impliedTagToken("tr", "StartTag"))
        return token

    def startTagTableOther(self, token):
        # XXX AT Any ideas on how to share this with endTagTable?
        if (self.tree.elementInScope("tbody", variant="table") or
            self.tree.elementInScope("thead", variant="table") or
                self.tree.elementInScope("tfoot", variant="table")):
            self.clearStackToTableBodyContext()
            self.endTagTableRowGroup(
                impliedTagToken(self.tree.openElements[-1].name))
            return token
        else:
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()

    def startTagOther(self, token):
        return self.parser.phases["inTable"].processStartTag(token)

    def endTagTableRowGroup(self, token):
        if self.tree.elementInScope(token["name"], variant="table"):
            self.clearStackToTableBodyContext()
            self.tree.openElements.pop()
            self.parser.phase = self.parser.phases["inTable"]
        else:
            self.parser.parseError("unexpected-end-tag-in-table-body",
                                   {"name": token["name"]})

    def endTagTable(self, token):
        if (self.tree.elementInScope("tbody", variant="table") or
            self.tree.elementInScope("thead", variant="table") or
                self.tree.elementInScope("tfoot", variant="table")):
            self.clearStackToTableBodyContext()
            self.endTagTableRowGroup(
                impliedTagToken(self.tree.openElements[-1].name))
            return token
        else:
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()

    def endTagIgnore(self, token):
        self.parser.parseError("unexpected-end-tag-in-table-body",
                               {"name": token["name"]})

    def endTagOther(self, token):
        return self.parser.phases["inTable"].processEndTag(token)

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        ("tr", startTagTr),
        (("td", "th"), startTagTableCell),
        (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
         startTagTableOther)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        (("tbody", "tfoot", "thead"), endTagTableRowGroup),
        ("table", endTagTable),
        (("body", "caption", "col", "colgroup", "html", "td", "th",
          "tr"), endTagIgnore)
    ])
    endTagHandler.default = endTagOther


class InRowPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
    __slots__ = tuple()

    # helper methods (XXX unify this with other table helper methods)
    def clearStackToTableRowContext(self):
        while self.tree.openElements[-1].name not in ("tr", "html"):
            self.parser.parseError("unexpected-implied-end-tag-in-table-row",
                                   {"name": self.tree.openElements[-1].name})
            self.tree.openElements.pop()

    def ignoreEndTagTr(self):
        return not self.tree.elementInScope("tr", variant="table")

    # the rest
    def processEOF(self):
        self.parser.phases["inTable"].processEOF()

    def processSpaceCharacters(self, token):
        return self.parser.phases["inTable"].processSpaceCharacters(token)

    def processCharacters(self, token):
        return self.parser.phases["inTable"].processCharacters(token)

    def startTagTableCell(self, token):
        self.clearStackToTableRowContext()
        self.tree.insertElement(token)
        self.parser.phase = self.parser.phases["inCell"]
        self.tree.activeFormattingElements.append(Marker)

    def startTagTableOther(self, token):
        ignoreEndTag = self.ignoreEndTagTr()
        self.endTagTr(impliedTagToken("tr"))
        # XXX how are we sure it's always ignored in the innerHTML case?
        if not ignoreEndTag:
            return token

    def startTagOther(self, token):
        return self.parser.phases["inTable"].processStartTag(token)

    def endTagTr(self, token):
        if not self.ignoreEndTagTr():
            self.clearStackToTableRowContext()
            self.tree.openElements.pop()
            self.parser.phase = self.parser.phases["inTableBody"]
        else:
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()

    def endTagTable(self, token):
        ignoreEndTag = self.ignoreEndTagTr()
        self.endTagTr(impliedTagToken("tr"))
        # Reprocess the current tag if the tr end tag was not ignored
        # XXX how are we sure it's always ignored in the innerHTML case?
        if not ignoreEndTag:
            return token

    def endTagTableRowGroup(self, token):
        if self.tree.elementInScope(token["name"], variant="table"):
            self.endTagTr(impliedTagToken("tr"))
            return token
        else:
            self.parser.parseError()

    def endTagIgnore(self, token):
        self.parser.parseError("unexpected-end-tag-in-table-row",
                               {"name": token["name"]})

    def endTagOther(self, token):
        return self.parser.phases["inTable"].processEndTag(token)

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        (("td", "th"), startTagTableCell),
        (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
          "tr"), startTagTableOther)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("tr", endTagTr),
        ("table", endTagTable),
        (("tbody", "tfoot", "thead"), endTagTableRowGroup),
        (("body", "caption", "col", "colgroup", "html", "td", "th"),
         endTagIgnore)
    ])
    endTagHandler.default = endTagOther


class InCellPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
    __slots__ = tuple()

    # helper
    def closeCell(self):
        if self.tree.elementInScope("td", variant="table"):
            self.endTagTableCell(impliedTagToken("td"))
        elif self.tree.elementInScope("th", variant="table"):
            self.endTagTableCell(impliedTagToken("th"))

    # the rest
    def processEOF(self):
        self.parser.phases["inBody"].processEOF()

    def processCharacters(self, token):
        return self.parser.phases["inBody"].processCharacters(token)

    def startTagTableOther(self, token):
        if (self.tree.elementInScope("td", variant="table") or
                self.tree.elementInScope("th", variant="table")):
            self.closeCell()
            return token
        else:
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()

    def startTagOther(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def endTagTableCell(self, token):
        if self.tree.elementInScope(token["name"], variant="table"):
            self.tree.generateImpliedEndTags(token["name"])
            if self.tree.openElements[-1].name != token["name"]:
                self.parser.parseError("unexpected-cell-end-tag",
                                       {"name": token["name"]})
                while True:
                    node = self.tree.openElements.pop()
                    if node.name == token["name"]:
                        break
            else:
                self.tree.openElements.pop()
            self.tree.clearActiveFormattingElements()
            self.parser.phase = self.parser.phases["inRow"]
        else:
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

    def endTagIgnore(self, token):
        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

    def endTagImply(self, token):
        if self.tree.elementInScope(token["name"], variant="table"):
            self.closeCell()
            return token
        else:
            # sometimes innerHTML case
            self.parser.parseError()

    def endTagOther(self, token):
        return self.parser.phases["inBody"].processEndTag(token)

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
          "thead", "tr"), startTagTableOther)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        (("td", "th"), endTagTableCell),
        (("body", "caption", "col", "colgroup", "html"), endTagIgnore),
        (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
    ])
    endTagHandler.default = endTagOther


class InSelectPhase(Phase):
    __slots__ = tuple()

    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
    def processEOF(self):
        if self.tree.openElements[-1].name != "html":
            self.parser.parseError("eof-in-select")
        else:
            assert self.parser.innerHTML

    def processCharacters(self, token):
        if token["data"] == "\u0000":
            return
        self.tree.insertText(token["data"])

    def startTagOption(self, token):
        # We need to imply </option> if <option> is the current node.
        if self.tree.openElements[-1].name == "option":
            self.tree.openElements.pop()
        self.tree.insertElement(token)

    def startTagOptgroup(self, token):
        if self.tree.openElements[-1].name == "option":
            self.tree.openElements.pop()
        if self.tree.openElements[-1].name == "optgroup":
            self.tree.openElements.pop()
        self.tree.insertElement(token)

    def startTagSelect(self, token):
        self.parser.parseError("unexpected-select-in-select")
        self.endTagSelect(impliedTagToken("select"))

    def startTagInput(self, token):
        self.parser.parseError("unexpected-input-in-select")
        if self.tree.elementInScope("select", variant="select"):
            self.endTagSelect(impliedTagToken("select"))
            return token
        else:
            assert self.parser.innerHTML

    def startTagScript(self, token):
        return self.parser.phases["inHead"].processStartTag(token)

    def startTagOther(self, token):
        self.parser.parseError("unexpected-start-tag-in-select",
                               {"name": token["name"]})

    def endTagOption(self, token):
        if self.tree.openElements[-1].name == "option":
            self.tree.openElements.pop()
        else:
            self.parser.parseError("unexpected-end-tag-in-select",
                                   {"name": "option"})

    def endTagOptgroup(self, token):
        # </optgroup> implicitly closes <option>
        if (self.tree.openElements[-1].name == "option" and
                self.tree.openElements[-2].name == "optgroup"):
            self.tree.openElements.pop()
        # It also closes </optgroup>
        if self.tree.openElements[-1].name == "optgroup":
            self.tree.openElements.pop()
        # But nothing else
        else:
            self.parser.parseError("unexpected-end-tag-in-select",
                                   {"name": "optgroup"})

    def endTagSelect(self, token):
        if self.tree.elementInScope("select", variant="select"):
            node = self.tree.openElements.pop()
            while node.name != "select":
                node = self.tree.openElements.pop()
            self.parser.resetInsertionMode()
        else:
            # innerHTML case
            assert self.parser.innerHTML
            self.parser.parseError()

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag-in-select",
                               {"name": token["name"]})

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        ("option", startTagOption),
        ("optgroup", startTagOptgroup),
        ("select", startTagSelect),
        (("input", "keygen", "textarea"), startTagInput),
        ("script", startTagScript)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("option", endTagOption),
        ("optgroup", endTagOptgroup),
        ("select", endTagSelect)
    ])
    endTagHandler.default = endTagOther


class InSelectInTablePhase(Phase):
    __slots__ = tuple()

    def processEOF(self):
        self.parser.phases["inSelect"].processEOF()

    def processCharacters(self, token):
        return self.parser.phases["inSelect"].processCharacters(token)

    def startTagTable(self, token):
        self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
        self.endTagOther(impliedTagToken("select"))
        return token

    def startTagOther(self, token):
        return self.parser.phases["inSelect"].processStartTag(token)

    def endTagTable(self, token):
        self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
        if self.tree.elementInScope(token["name"], variant="table"):
            self.endTagOther(impliedTagToken("select"))
            return token

    def endTagOther(self, token):
        return self.parser.phases["inSelect"].processEndTag(token)

    startTagHandler = _utils.MethodDispatcher([
        (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
         startTagTable)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
         endTagTable)
    ])
    endTagHandler.default = endTagOther


class InForeignContentPhase(Phase):
    __slots__ = tuple()

    breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
                                  "center", "code", "dd", "div", "dl", "dt",
                                  "em", "embed", "h1", "h2", "h3",
                                  "h4", "h5", "h6", "head", "hr", "i", "img",
                                  "li", "listing", "menu", "meta", "nobr",
                                  "ol", "p", "pre", "ruby", "s", "small",
                                  "span", "strong", "strike", "sub", "sup",
                                  "table", "tt", "u", "ul", "var"])

    def adjustSVGTagNames(self, token):
        replacements = {"altglyph": "altGlyph",
                        "altglyphdef": "altGlyphDef",
                        "altglyphitem": "altGlyphItem",
                        "animatecolor": "animateColor",
                        "animatemotion": "animateMotion",
                        "animatetransform": "animateTransform",
                        "clippath": "clipPath",
                        "feblend": "feBlend",
                        "fecolormatrix": "feColorMatrix",
                        "fecomponenttransfer": "feComponentTransfer",
                        "fecomposite": "feComposite",
                        "feconvolvematrix": "feConvolveMatrix",
                        "fediffuselighting": "feDiffuseLighting",
                        "fedisplacementmap": "feDisplacementMap",
                        "fedistantlight": "feDistantLight",
                        "feflood": "feFlood",
                        "fefunca": "feFuncA",
                        "fefuncb": "feFuncB",
                        "fefuncg": "feFuncG",
                        "fefuncr": "feFuncR",
                        "fegaussianblur": "feGaussianBlur",
                        "feimage": "feImage",
                        "femerge": "feMerge",
                        "femergenode": "feMergeNode",
                        "femorphology": "feMorphology",
                        "feoffset": "feOffset",
                        "fepointlight": "fePointLight",
                        "fespecularlighting": "feSpecularLighting",
                        "fespotlight": "feSpotLight",
                        "fetile": "feTile",
                        "feturbulence": "feTurbulence",
                        "foreignobject": "foreignObject",
                        "glyphref": "glyphRef",
                        "lineargradient": "linearGradient",
                        "radialgradient": "radialGradient",
                        "textpath": "textPath"}

        if token["name"] in replacements:
            token["name"] = replacements[token["name"]]

    def processCharacters(self, token):
        if token["data"] == "\u0000":
            token["data"] = "\uFFFD"
        elif (self.parser.framesetOK and
              any(char not in spaceCharacters for char in token["data"])):
            self.parser.framesetOK = False
        Phase.processCharacters(self, token)

    def processStartTag(self, token):
        currentNode = self.tree.openElements[-1]
        if (token["name"] in self.breakoutElements or
            (token["name"] == "font" and
             set(token["data"].keys()) & {"color", "face", "size"})):
            self.parser.parseError("unexpected-html-element-in-foreign-content",
                                   {"name": token["name"]})
            while (self.tree.openElements[-1].namespace !=
                   self.tree.defaultNamespace and
                   not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
                   not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
                self.tree.openElements.pop()
            return token

        else:
            if currentNode.namespace == namespaces["mathml"]:
                self.parser.adjustMathMLAttributes(token)
            elif currentNode.namespace == namespaces["svg"]:
                self.adjustSVGTagNames(token)
                self.parser.adjustSVGAttributes(token)
            self.parser.adjustForeignAttributes(token)
            token["namespace"] = currentNode.namespace
            self.tree.insertElement(token)
            if token["selfClosing"]:
                self.tree.openElements.pop()
                token["selfClosingAcknowledged"] = True

    def processEndTag(self, token):
        nodeIndex = len(self.tree.openElements) - 1
        node = self.tree.openElements[-1]
        if node.name.translate(asciiUpper2Lower) != token["name"]:
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

        while True:
            if node.name.translate(asciiUpper2Lower) == token["name"]:
                # XXX this isn't in the spec but it seems necessary
                if self.parser.phase == self.parser.phases["inTableText"]:
                    self.parser.phase.flushCharacters()
                    self.parser.phase = self.parser.phase.originalPhase
                while self.tree.openElements.pop() != node:
                    assert self.tree.openElements
                new_token = None
                break
            nodeIndex -= 1

            node = self.tree.openElements[nodeIndex]
            if node.namespace != self.tree.defaultNamespace:
                continue
            else:
                new_token = self.parser.phase.processEndTag(token)
                break
        return new_token


class AfterBodyPhase(Phase):
    __slots__ = tuple()

    def processEOF(self):
        # Stop parsing
        pass

    def processComment(self, token):
        # This is needed because data is to be appended to the <html> element
        # here and not to whatever is currently open.
        self.tree.insertComment(token, self.tree.openElements[0])

    def processCharacters(self, token):
        self.parser.parseError("unexpected-char-after-body")
        self.parser.phase = self.parser.phases["inBody"]
        return token

    def startTagHtml(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagOther(self, token):
        self.parser.parseError("unexpected-start-tag-after-body",
                               {"name": token["name"]})
        self.parser.phase = self.parser.phases["inBody"]
        return token

    def endTagHtml(self, name):
        if self.parser.innerHTML:
            self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
        else:
            self.parser.phase = self.parser.phases["afterAfterBody"]

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag-after-body",
                               {"name": token["name"]})
        self.parser.phase = self.parser.phases["inBody"]
        return token

    startTagHandler = _utils.MethodDispatcher([
        ("html", startTagHtml)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
    endTagHandler.default = endTagOther


class InFramesetPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
    __slots__ = tuple()

    def processEOF(self):
        if self.tree.openElements[-1].name != "html":
            self.parser.parseError("eof-in-frameset")
        else:
            assert self.parser.innerHTML

    def processCharacters(self, token):
        self.parser.parseError("unexpected-char-in-frameset")

    def startTagFrameset(self, token):
        self.tree.insertElement(token)

    def startTagFrame(self, token):
        self.tree.insertElement(token)
        self.tree.openElements.pop()

    def startTagNoframes(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagOther(self, token):
        self.parser.parseError("unexpected-start-tag-in-frameset",
                               {"name": token["name"]})

    def endTagFrameset(self, token):
        if self.tree.openElements[-1].name == "html":
            # innerHTML case
            self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
        else:
            self.tree.openElements.pop()
        if (not self.parser.innerHTML and
                self.tree.openElements[-1].name != "frameset"):
            # If we're not in innerHTML mode and the current node is not a
            # "frameset" element (anymore) then switch.
            self.parser.phase = self.parser.phases["afterFrameset"]

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag-in-frameset",
                               {"name": token["name"]})

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        ("frameset", startTagFrameset),
        ("frame", startTagFrame),
        ("noframes", startTagNoframes)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("frameset", endTagFrameset)
    ])
    endTagHandler.default = endTagOther


class AfterFramesetPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#after3
    __slots__ = tuple()

    def processEOF(self):
        # Stop parsing
        pass

    def processCharacters(self, token):
        self.parser.parseError("unexpected-char-after-frameset")

    def startTagNoframes(self, token):
        return self.parser.phases["inHead"].processStartTag(token)

    def startTagOther(self, token):
        self.parser.parseError("unexpected-start-tag-after-frameset",
                               {"name": token["name"]})

    def endTagHtml(self, token):
        self.parser.phase = self.parser.phases["afterAfterFrameset"]

    def endTagOther(self, token):
        self.parser.parseError("unexpected-end-tag-after-frameset",
                               {"name": token["name"]})

    startTagHandler = _utils.MethodDispatcher([
        ("html", Phase.startTagHtml),
        ("noframes", startTagNoframes)
    ])
    startTagHandler.default = startTagOther

    endTagHandler = _utils.MethodDispatcher([
        ("html", endTagHtml)
    ])
    endTagHandler.default = endTagOther


class AfterAfterBodyPhase(Phase):
    __slots__ = tuple()

    def processEOF(self):
        pass

    def processComment(self, token):
        self.tree.insertComment(token, self.tree.document)

    def processSpaceCharacters(self, token):
        return self.parser.phases["inBody"].processSpaceCharacters(token)

    def processCharacters(self, token):
        self.parser.parseError("expected-eof-but-got-char")
        self.parser.phase = self.parser.phases["inBody"]
        return token

    def startTagHtml(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagOther(self, token):
        self.parser.parseError("expected-eof-but-got-start-tag",
                               {"name": token["name"]})
        self.parser.phase = self.parser.phases["inBody"]
        return token

    def processEndTag(self, token):
        self.parser.parseError("expected-eof-but-got-end-tag",
                               {"name": token["name"]})
        self.parser.phase = self.parser.phases["inBody"]
        return token

    startTagHandler = _utils.MethodDispatcher([
        ("html", startTagHtml)
    ])
    startTagHandler.default = startTagOther


class AfterAfterFramesetPhase(Phase):
    __slots__ = tuple()

    def processEOF(self):
        pass

    def processComment(self, token):
        self.tree.insertComment(token, self.tree.document)

    def processSpaceCharacters(self, token):
        return self.parser.phases["inBody"].processSpaceCharacters(token)

    def processCharacters(self, token):
        self.parser.parseError("expected-eof-but-got-char")

    def startTagHtml(self, token):
        return self.parser.phases["inBody"].processStartTag(token)

    def startTagNoFrames(self, token):
        return self.parser.phases["inHead"].processStartTag(token)

    def startTagOther(self, token):
        self.parser.parseError("expected-eof-but-got-start-tag",
                               {"name": token["name"]})

    def processEndTag(self, token):
        self.parser.parseError("expected-eof-but-got-end-tag",
                               {"name": token["name"]})

    startTagHandler = _utils.MethodDispatcher([
        ("html", startTagHtml),
        ("noframes", startTagNoFrames)
    ])
    startTagHandler.default = startTagOther

# pylint:enable=unused-argument


_phases = {
    "initial": InitialPhase,
    "beforeHtml": BeforeHtmlPhase,
    "beforeHead": BeforeHeadPhase,
    "inHead": InHeadPhase,
    "inHeadNoscript": InHeadNoscriptPhase,
    "afterHead": AfterHeadPhase,
    "inBody": InBodyPhase,
    "text": TextPhase,
    "inTable": InTablePhase,
    "inTableText": InTableTextPhase,
    "inCaption": InCaptionPhase,
    "inColumnGroup": InColumnGroupPhase,
    "inTableBody": InTableBodyPhase,
    "inRow": InRowPhase,
    "inCell": InCellPhase,
    "inSelect": InSelectPhase,
    "inSelectInTable": InSelectInTablePhase,
    "inForeignContent": InForeignContentPhase,
    "afterBody": AfterBodyPhase,
    "inFrameset": InFramesetPhase,
    "afterFrameset": AfterFramesetPhase,
    "afterAfterBody": AfterAfterBodyPhase,
    "afterAfterFrameset": AfterAfterFramesetPhase,
    # XXX after after frameset
}


def adjust_attributes(token, replacements):
    needs_adjustment = token['data'].keys() & replacements.keys()
    if needs_adjustment:
        token['data'] = type(token['data'])((replacements.get(k, k), v)
                                            for k, v in token['data'].items())


def impliedTagToken(name, type="EndTag", attributes=None,
                    selfClosing=False):
    if attributes is None:
        attributes = {}
    return {"type": tokenTypes[type], "name": name, "data": attributes,
            "selfClosing": selfClosing}


class ParseError(Exception):
    """Error in parsed document"""
    pass
