# This file is part of EbookLib.
# Copyright (c) 2013 Aleksandar Erkalovic <aerkalov@gmail.com>
#
# EbookLib is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# EbookLib is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.

import six

from ebooklib.plugins.base import BasePlugin
from ebooklib.utils import parse_html_string

# TODO:
#   - should also look for the _required_ elements
# http://www.w3.org/html/wg/drafts/html/master/tabular-data.html#the-table-element

ATTRIBUTES_GLOBAL = [
    "accesskey",
    "class",
    "contenteditable",
    "contextmenu",
    "dir",
    "draggable",
    "dropzone",
    "hidden",
    "id",
    "inert",
    "itemid",
    "itemprop",
    "itemref",
    "itemscope",
    "itemtype",
    "lang",
    "spellcheck",
    "style",
    "tabindex",
    "title",
    "translate",
    "epub:type",
]

# Remove <u> for now from here
DEPRECATED_TAGS = [
    "acronym",
    "applet",
    "basefont",
    "big",
    "center",
    "dir",
    "font",
    "frame",
    "frameset",
    "isindex",
    "noframes",
    "s",
    "strike",
    "tt",
]


def leave_only(item, tag_list):
    for _attr in six.iterkeys(item.attrib):
        if _attr not in tag_list:
            del item.attrib[_attr]


class SyntaxPlugin(BasePlugin):
    NAME = "Check HTML syntax"

    def html_before_write(self, book, chapter):
        from lxml import etree

        try:
            tree = parse_html_string(chapter.content)
        except Exception:
            return

        root = tree.getroottree()

        # delete deprecated tags
        # i should really have a list of allowed tags
        for tag in DEPRECATED_TAGS:
            etree.strip_tags(root, tag)

        head = tree.find("head")

        if head is not None and len(head) != 0:
            for _item in head:
                if _item.tag == "base":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["href", "target"])
                elif _item.tag == "link":
                    leave_only(
                        _item, ATTRIBUTES_GLOBAL + ["href", "crossorigin", "rel", "media", "hreflang", "type", "sizes"]
                    )
                elif _item.tag == "title":
                    if _item.text == "":
                        head.remove(_item)
                elif _item.tag == "meta":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["name", "http-equiv", "content", "charset"])
                    # just remove for now, but really should not be like this
                    head.remove(_item)
                elif _item.tag == "script":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["src", "type", "charset", "async", "defer", "crossorigin"])
                elif _item.tag == "source":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["src", "type", "media"])
                elif _item.tag == "style":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["media", "type", "scoped"])
                else:
                    leave_only(_item, ATTRIBUTES_GLOBAL)

        if len(root.find("body")) != 0:
            body = tree.find("body")

            for _item in body.iter():
                # it is not
                # <a class="indexterm" href="ch05.html#ix_epub:trigger_element">

                if _item.tag == "a":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["href", "target", "download", "rel", "hreflang", "type"])
                elif _item.tag == "area":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + ["alt", "coords", "shape", "href", "target", "download", "rel", "hreflang", "type"],
                    )
                elif _item.tag == "audio":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + ["src", "crossorigin", "preload", "autoplay", "mediagroup", "loop", "muted", "controls"],
                    )
                elif _item.tag == "blockquote":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["cite"])
                elif _item.tag == "button":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + [
                            "autofocus",
                            "disabled",
                            "form",
                            "formaction",
                            "formenctype",
                            "formmethod",
                            "formnovalidate",
                            "formtarget",
                            "name",
                            "type",
                            "value",
                            "menu",
                        ],
                    )
                elif _item.tag == "canvas":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["width", "height"])
                elif _item.tag == "canvas":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["width", "height"])
                elif _item.tag == "del":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["cite", "datetime"])
                elif _item.tag == "details":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["open"])
                elif _item.tag == "embed":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["src", "type", "width", "height"])
                elif _item.tag == "fieldset":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["disable", "form", "name"])
                elif _item.tag == "details":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + [
                            "accept-charset",
                            "action",
                            "autocomplete",
                            "enctype",
                            "method",
                            "name",
                            "novalidate",
                            "target",
                        ],
                    )
                elif _item.tag == "iframe":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + ["src", "srcdoc", "name", "sandbox", "seamless", "allowfullscreen", "width", "height"],
                    )
                elif _item.tag == "img":
                    _src = _item.get("src", "").lower()
                    if _src.startswith("http://") or _src.startswith("https://"):
                        if "remote-resources" not in chapter.properties:
                            chapter.properties.append("remote-resources")
                            # THIS DOES NOT WORK, ONLY VIDEO AND AUDIO FILES CAN BE REMOTE RESOURCES
                            # THAT MEANS I SHOULD ALSO CATCH <SOURCE TAG
                            from ebooklib import epub

                            _img = epub.EpubImage(file_name=_item.get("src"))
                            book.add_item(_img)
                    leave_only(
                        _item, ATTRIBUTES_GLOBAL + ["alt", "src", "crossorigin", "usemap", "ismap", "width", "height"]
                    )
                elif _item.tag == "input":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + [
                            "accept",
                            "alt",
                            "autocomplete",
                            "autofocus",
                            "checked",
                            "dirname",
                            "disabled",
                            "form",
                            "formaction",
                            "formenctype",
                            "formmethod",
                            "formnovalidate",
                            "formtarget",
                            "height",
                            "inputmode",
                            "list",
                            "max",
                            "maxlength",
                            "min",
                            "multiple",
                            "name",
                            "pattern",
                            "placeholder",
                            "readonly",
                            "required",
                            "size",
                            "src",
                            "steptype",
                            "value",
                            "width",
                        ],
                    )
                elif _item.tag == "ins":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["cite", "datetime"])
                elif _item.tag == "keygen":
                    leave_only(
                        _item, ATTRIBUTES_GLOBAL + ["autofocus", "challenge", "disabled", "form", "keytype", "name"]
                    )
                elif _item.tag == "label":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["form", "for"])
                elif _item.tag == "label":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["form", "for"])
                elif _item.tag == "map":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["name"])
                elif _item.tag == "menu":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["type", "label"])
                elif _item.tag == "object":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + ["data", "type", "typemustmatch", "name", "usemap", "form", "width", "height"],
                    )
                elif _item.tag == "ol":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["reversed", "start", "type"])
                elif _item.tag == "optgroup":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["disabled", "label"])
                elif _item.tag == "option":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["disabled", "label", "selected", "value"])
                elif _item.tag == "output":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["for", "form", "name"])
                elif _item.tag == "param":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["name", "value"])
                elif _item.tag == "progress":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["value", "max"])
                elif _item.tag == "q":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["cite"])
                elif _item.tag == "select":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL + ["autofocus", "disabled", "form", "multiple", "name", "required", "size"],
                    )

                elif _item.tag == "table":
                    if _item.get("border", None):
                        if _item.get("border") == "0":
                            _item.set("border", "")

                    if _item.get("summary", None):
                        _caption = etree.Element("caption", {})
                        _caption.text = _item.get("summary")
                        _item.insert(0, _caption)

                        # add it as caption
                        del _item.attrib["summary"]

                    leave_only(_item, ATTRIBUTES_GLOBAL + ["border", "sortable"])
                elif _item.tag == "dl":
                    _d = _item.find("dd")
                    if _d is not None and len(_d) == 0:
                        pass

                        # http://html5doctor.com/the-dl-element/
                        # should be like this really
                        # some of the elements can be missing
                        # dl
                        #   dt
                        #   dd
                        #   dt
                        #   dd
                elif _item.tag == "td":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["colspan", "rowspan", "headers"])
                elif _item.tag == "textarea":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + [
                            "autocomplete",
                            "autofocus",
                            "cols",
                            "dirname",
                            "disabled",
                            "form",
                            "inputmode",
                            "maxlength",
                            "name",
                            "placeholder",
                            "readonly",
                            "required",
                            "rows",
                            "wrap",
                        ],
                    )

                elif _item.tag in ["col", "colgroup"]:
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["span"])
                elif _item.tag == "th":
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["colspan", "rowspan", "headers", "scope", "abbr", "sorted"])
                elif _item.tag in ["time"]:
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["datetime"])
                elif _item.tag in ["track"]:
                    leave_only(_item, ATTRIBUTES_GLOBAL + ["kind", "src", "srclang", "label", "default"])
                elif _item.tag == "video":
                    leave_only(
                        _item,
                        ATTRIBUTES_GLOBAL
                        + [
                            "src",
                            "crossorigin",
                            "poster",
                            "preload",
                            "autoplay",
                            "mediagroup",
                            "loop",
                            "muted",
                            "controls",
                            "width",
                            "height",
                        ],
                    )
                elif _item.tag == "svg":
                    # We need to add property "svg" in case we have embeded svg file
                    if "svg" not in chapter.properties:
                        chapter.properties.append("svg")

                    if _item.get("viewbox", None):
                        del _item.attrib["viewbox"]

                    if _item.get("preserveaspectratio", None):
                        del _item.attrib["preserveaspectratio"]
                else:
                    for _attr in six.iterkeys(_item.attrib):
                        if _attr not in ATTRIBUTES_GLOBAL:
                            del _item.attrib[_attr]

        chapter.content = etree.tostring(tree, pretty_print=True, encoding="utf-8", xml_declaration=True)

        return chapter.content
