1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
"""tldextract unit tests with a custom suffix list."""
import os
import tempfile
from pathlib import Path
import tldextract
from tldextract.tldextract import ExtractResult
FAKE_SUFFIX_LIST_URL = Path(
os.path.dirname(os.path.abspath(__file__)),
"fixtures",
"fake_suffix_list_fixture.dat",
).as_uri()
EXTRA_SUFFIXES = ["foo1", "bar1", "baz1"]
extract_using_fake_suffix_list = tldextract.TLDExtract(
cache_dir=tempfile.mkdtemp(), suffix_list_urls=[FAKE_SUFFIX_LIST_URL]
)
extract_using_fake_suffix_list_no_cache = tldextract.TLDExtract(
cache_dir=None, suffix_list_urls=[FAKE_SUFFIX_LIST_URL]
)
extract_using_extra_suffixes = tldextract.TLDExtract(
cache_dir=None,
suffix_list_urls=[FAKE_SUFFIX_LIST_URL],
extra_suffixes=EXTRA_SUFFIXES,
)
def test_private_extraction() -> None:
"""Test this library's uncached, offline, private domain extraction."""
tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])
assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False)
assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
"",
"foo",
"blogspot.com",
True,
)
def test_suffix_which_is_not_in_custom_list() -> None:
"""Test a custom suffix list without .com."""
for fun in (
extract_using_fake_suffix_list,
extract_using_fake_suffix_list_no_cache,
):
result = fun("www.google.com")
assert result.suffix == ""
def test_custom_suffixes() -> None:
"""Test a custom suffix list with common, metasyntactic suffixes."""
for fun in (
extract_using_fake_suffix_list,
extract_using_fake_suffix_list_no_cache,
):
for custom_suffix in ("foo", "bar", "baz"):
result = fun("www.foo.bar.baz.quux" + "." + custom_suffix)
assert result.suffix == custom_suffix
def test_suffix_which_is_not_in_extra_list() -> None:
"""Test a custom suffix list and extra suffixes without .com."""
result = extract_using_extra_suffixes("www.google.com")
assert result.suffix == ""
def test_extra_suffixes() -> None:
"""Test extra suffixes."""
for custom_suffix in EXTRA_SUFFIXES:
netloc = "www.foo.bar.baz.quux" + "." + custom_suffix
result = extract_using_extra_suffixes(netloc)
assert result.suffix == custom_suffix
|