1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
"""Test ability to run in parallel with shared cache."""
from __future__ import annotations
import os
from multiprocessing import Pool
from pathlib import Path
import pytest
import responses
from tldextract import TLDExtract
from tldextract.tldextract import PUBLIC_SUFFIX_LIST_URLS
def test_multiprocessing_makes_one_request(tmp_path: Path) -> None:
"""Ensure there aren't duplicate download requests."""
process_count = 3
with Pool(processes=process_count) as pool:
http_request_counts = pool.map(_run_extractor, [tmp_path] * process_count)
assert sum(http_request_counts) == 1
def _run_extractor(cache_dir: Path) -> int:
"""Run the extractor."""
with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
rsps.add(responses.GET, PUBLIC_SUFFIX_LIST_URLS[0], status=208, body="uk.co")
extract = TLDExtract(cache_dir=str(cache_dir))
extract("bar.uk.com", include_psl_private_domains=True)
num_calls = len(rsps.calls)
return num_calls
@responses.activate
def test_cache_cleared_by_other_process(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Simulate a file being deleted after we check for existence but before we try to delete it."""
responses.add(responses.GET, PUBLIC_SUFFIX_LIST_URLS[0], status=208, body="uk.com")
cache_dir = str(tmp_path)
extract = TLDExtract(cache_dir=cache_dir)
extract("google.com")
orig_unlink = os.unlink
def is_relative_to(path: Path, other_path: str | Path) -> bool:
"""Return True if path is relative to other_path or False.
Taken from the Python 3.9 standard library.
Reference: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to
"""
try:
path.relative_to(other_path)
return True
except ValueError:
return False
def evil_unlink(filename: str | Path) -> None:
"""Simulate someone deletes the file right before we try to."""
if (isinstance(filename, str) and filename.startswith(cache_dir)) or (
isinstance(filename, Path) and is_relative_to(filename, cache_dir)
):
orig_unlink(filename)
orig_unlink(filename)
monkeypatch.setattr(os, "unlink", evil_unlink)
extract.update(fetch_now=True)
|