1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
from typing import List, Pattern, Type, Union
from pytest import mark, raises
from parsel.utils import extract_regex, shorten
@mark.parametrize(
"width,expected",
(
(-1, ValueError),
(0, ""),
(1, "."),
(2, ".."),
(3, "..."),
(4, "f..."),
(5, "fo..."),
(6, "foobar"),
(7, "foobar"),
),
)
def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
if isinstance(expected, str):
assert shorten("foobar", width) == expected
else:
with raises(expected):
shorten("foobar", width)
@mark.parametrize(
"regex, text, replace_entities, expected",
(
[
r"(?P<month>\w+)\s*(?P<day>\d+)\s*\,?\s*(?P<year>\d+)",
"October 25, 2019",
True,
["October", "25", "2019"],
],
[
r"(?P<month>\w+)\s*(?P<day>\d+)\s*\,?\s*(?P<year>\d+)",
"October 25 2019",
True,
["October", "25", "2019"],
],
[
r"(?P<extract>\w+)\s*(?P<day>\d+)\s*\,?\s*(?P<year>\d+)",
"October 25 2019",
True,
["October"],
],
[
r"\w+\s*\d+\s*\,?\s*\d+",
"October 25 2019",
True,
["October 25 2019"],
],
[
r"^.*$",
""sometext" & "moretext"",
True,
['"sometext" & "moretext"'],
],
[
r"^.*$",
""sometext" & "moretext"",
False,
[""sometext" & "moretext""],
],
),
)
def test_extract_regex(
regex: Union[str, Pattern[str]],
text: str,
replace_entities: bool,
expected: List[str],
) -> None:
assert extract_regex(regex, text, replace_entities) == expected
|