1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
from __future__ import annotations
from typing import TYPE_CHECKING
import pytest
from parsel.utils import extract_regex, shorten
if TYPE_CHECKING:
from re import Pattern
@pytest.mark.parametrize(
("text", "width", "suffix", "expected"),
[
("foobar", -1, "...", ValueError),
("foobar", 0, "...", ""),
("foobar", 1, "...", "."),
("foobar", 2, "...", ".."),
("foobar", 3, "...", "..."),
("foobar", 4, "...", "f..."),
("foobar", 5, "...", "fo..."),
("foobar", 6, "...", "foobar"),
("foobar", 7, "...", "foobar"),
("hello", 3, "…", "he…"),
("hello", 4, "…", "hel…"),
("test", 2, "->", "->"),
("test", 3, "->", "t->"),
("test", 4, "->", "test"),
("", 0, "...", ""),
("", 3, "...", ""),
],
)
def test_shorten(
text: str, width: int, suffix: str, expected: str | type[Exception]
) -> None:
if isinstance(expected, str):
assert shorten(text, width, suffix=suffix) == expected
else:
with pytest.raises(expected):
shorten(text, width, suffix=suffix)
@pytest.mark.parametrize(
("regex", "text", "replace_entities", "expected"),
[
(
r"(?P<month>\w+)\s*(?P<day>\d+)\s*\,?\s*(?P<year>\d+)",
"October 25, 2019",
True,
["October", "25", "2019"],
),
(
r"(?P<month>\w+)\s*(?P<day>\d+)\s*\,?\s*(?P<year>\d+)",
"October 25 2019",
True,
["October", "25", "2019"],
),
(
r"(?P<extract>\w+)\s*(?P<day>\d+)\s*\,?\s*(?P<year>\d+)",
"October 25 2019",
True,
["October"],
),
(
r"\w+\s*\d+\s*\,?\s*\d+",
"October 25 2019",
True,
["October 25 2019"],
),
(
r"^.*$",
""sometext" & "moretext"",
True,
['"sometext" & "moretext"'],
),
(
r"^.*$",
""sometext" & "moretext"",
False,
[""sometext" & "moretext""],
),
(
r"(?P<extract>\d+)",
"no digits here",
True,
[],
),
],
)
def test_extract_regex(
regex: str | Pattern[str],
text: str,
replace_entities: bool,
expected: list[str],
) -> None:
assert extract_regex(regex, text, replace_entities) == expected
|