File: clean.pyi

package info (click to toggle)
lxml-html-clean 0.4.2-1
links: PTS
area: main
in suites: forky, sid, trixie
size: 228 kB
sloc: python: 865; makefile: 12
file content (113 lines) | stat: -rw-r--r-- 3,816 bytes
from typing import Collection, Iterable, Literal, Pattern, TypeVar, overload

from lxml.etree import _ElementTree
from lxml.html import HtmlElement

# For methods generating output from input data, their types would match
_DT = TypeVar("_DT", str, bytes, HtmlElement)
_ET_DT = TypeVar("_ET_DT", str, bytes, HtmlElement, _ElementTree[HtmlElement])


def _get_authority_from_url(url: str) -> str | None: ...


class LXMLHTMLCleanWarning(Warning):
    pass


class AmbiguousURLWarning(LXMLHTMLCleanWarning):
    pass


class Cleaner:
    @overload  # allow_tags present, remove_unknown_tags must be False
    def __init__(
        self,
        *,
        scripts: bool = True,
        javascript: bool = True,
        comments: bool = True,
        style: bool = False,
        inline_style: bool | None = None,
        links: bool = True,
        meta: bool = True,
        page_structure: bool = True,
        processing_instructions: bool = True,
        embedded: bool = True,
        frames: bool = True,
        forms: bool = True,
        annoying_tags: bool = True,
        remove_tags: Collection[str] = (),
        allow_tags: Collection[str] = (),
        kill_tags: Collection[str] = (),
        remove_unknown_tags: Literal[False] = False,
        safe_attrs_only: bool = True,
        safe_attrs: Collection[str] = ...,
        add_nofollow: bool = False,
        host_whitelist: Collection[str] = (),
        whitelist_tags: Collection[str] | None = {"iframe", "embed"},
    ) -> None: ...
    @overload  # ... otherwise, allow_tags must not be used
    def __init__(
        self,
        *,
        scripts: bool = True,
        javascript: bool = True,
        comments: bool = True,
        style: bool = False,
        inline_style: bool | None = None,
        links: bool = True,
        meta: bool = True,
        page_structure: bool = True,
        processing_instructions: bool = True,
        embedded: bool = True,
        frames: bool = True,
        forms: bool = True,
        annoying_tags: bool = True,
        remove_tags: Collection[str] = (),
        kill_tags: Collection[str] = (),
        remove_unknown_tags: bool = True,
        safe_attrs_only: bool = True,
        safe_attrs: Collection[str] = ...,
        add_nofollow: bool = False,
        host_whitelist: Collection[str] = (),
        whitelist_tags: Collection[str] = {"iframe", "embed"},
    ) -> None: ...
    def __call__(self, doc: HtmlElement | _ElementTree[HtmlElement]) -> None: ...
    def allow_follow(self, anchor: HtmlElement) -> bool: ...
    def allow_element(self, el: HtmlElement) -> bool: ...
    def allow_embedded_url(self, el: HtmlElement, url: str) -> bool: ...
    def kill_conditional_comments(self, doc: HtmlElement | _ElementTree[HtmlElement]) -> None: ...
    def clean_html(self, html: _ET_DT) -> _ET_DT: ...

clean: Cleaner
clean_html = clean.clean_html

def autolink(
    el: HtmlElement,
    link_regexes: Iterable[Pattern[str]] = ...,
    avoid_elements: Collection[str] = ...,
    avoid_hosts: Iterable[Pattern[str]] = ...,
    avoid_classes: Collection[str] = ["nolink"],
) -> None: ...
def autolink_html(
    html: _DT,
    link_regexes: Iterable[Pattern[str]] = ...,
    avoid_elements: Collection[str] = ...,
    avoid_hosts: Iterable[Pattern[str]] = ...,
    avoid_classes: Collection[str] = ["nolink"],
) -> _DT: ...
def word_break(
    el: HtmlElement,
    max_width: int = 40,
    avoid_elements: Collection[str] = ["pre", "textarea", "code"],
    avoid_classes: Collection[str] = ["nobreak"],
    break_character: str = chr(0x200B),
) -> None: ...
def word_break_html(
    html: _DT,
    max_width: int = 40,
    avoid_elements: Collection[str] = ["pre", "textarea", "code"],
    avoid_classes: Collection[str] = ["nobreak"],
    break_character: str = chr(0x200B),
) -> _DT: ...