1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
from typing import Collection, Iterable, Literal, Pattern, TypeVar, overload
from lxml.etree import _ElementTree
from lxml.html import HtmlElement
# For methods generating output from input data, their types would match
_DT = TypeVar("_DT", str, bytes, HtmlElement)
_ET_DT = TypeVar("_ET_DT", str, bytes, HtmlElement, _ElementTree[HtmlElement])
def _get_authority_from_url(url: str) -> str | None: ...
class LXMLHTMLCleanWarning(Warning):
pass
class AmbiguousURLWarning(LXMLHTMLCleanWarning):
pass
class Cleaner:
@overload # allow_tags present, remove_unknown_tags must be False
def __init__(
self,
*,
scripts: bool = True,
javascript: bool = True,
comments: bool = True,
style: bool = False,
inline_style: bool | None = None,
links: bool = True,
meta: bool = True,
page_structure: bool = True,
processing_instructions: bool = True,
embedded: bool = True,
frames: bool = True,
forms: bool = True,
annoying_tags: bool = True,
remove_tags: Collection[str] = (),
allow_tags: Collection[str] = (),
kill_tags: Collection[str] = (),
remove_unknown_tags: Literal[False] = False,
safe_attrs_only: bool = True,
safe_attrs: Collection[str] = ...,
add_nofollow: bool = False,
host_whitelist: Collection[str] = (),
whitelist_tags: Collection[str] | None = {"iframe", "embed"},
) -> None: ...
@overload # ... otherwise, allow_tags must not be used
def __init__(
self,
*,
scripts: bool = True,
javascript: bool = True,
comments: bool = True,
style: bool = False,
inline_style: bool | None = None,
links: bool = True,
meta: bool = True,
page_structure: bool = True,
processing_instructions: bool = True,
embedded: bool = True,
frames: bool = True,
forms: bool = True,
annoying_tags: bool = True,
remove_tags: Collection[str] = (),
kill_tags: Collection[str] = (),
remove_unknown_tags: bool = True,
safe_attrs_only: bool = True,
safe_attrs: Collection[str] = ...,
add_nofollow: bool = False,
host_whitelist: Collection[str] = (),
whitelist_tags: Collection[str] = {"iframe", "embed"},
) -> None: ...
def __call__(self, doc: HtmlElement | _ElementTree[HtmlElement]) -> None: ...
def allow_follow(self, anchor: HtmlElement) -> bool: ...
def allow_element(self, el: HtmlElement) -> bool: ...
def allow_embedded_url(self, el: HtmlElement, url: str) -> bool: ...
def kill_conditional_comments(self, doc: HtmlElement | _ElementTree[HtmlElement]) -> None: ...
def clean_html(self, html: _ET_DT) -> _ET_DT: ...
clean: Cleaner
clean_html = clean.clean_html
def autolink(
el: HtmlElement,
link_regexes: Iterable[Pattern[str]] = ...,
avoid_elements: Collection[str] = ...,
avoid_hosts: Iterable[Pattern[str]] = ...,
avoid_classes: Collection[str] = ["nolink"],
) -> None: ...
def autolink_html(
html: _DT,
link_regexes: Iterable[Pattern[str]] = ...,
avoid_elements: Collection[str] = ...,
avoid_hosts: Iterable[Pattern[str]] = ...,
avoid_classes: Collection[str] = ["nolink"],
) -> _DT: ...
def word_break(
el: HtmlElement,
max_width: int = 40,
avoid_elements: Collection[str] = ["pre", "textarea", "code"],
avoid_classes: Collection[str] = ["nobreak"],
break_character: str = chr(0x200B),
) -> None: ...
def word_break_html(
html: _DT,
max_width: int = 40,
avoid_elements: Collection[str] = ["pre", "textarea", "code"],
avoid_classes: Collection[str] = ["nobreak"],
break_character: str = chr(0x200B),
) -> _DT: ...
|