1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
|
import pytest
from mdurl import encode
@pytest.mark.parametrize(
"input_,expected",
[
pytest.param("%%%", "%25%25%25", id="should encode percent"),
pytest.param("\r\n", "%0D%0A", id="should encode control chars"),
pytest.param("?#", "?#", id="should not encode parts of an url"),
pytest.param("[]^", "%5B%5D%5E", id="should not encode []^ - commonmark tests"),
pytest.param("my url", "my%20url", id="should encode spaces"),
pytest.param("φου", "%CF%86%CE%BF%CF%85", id="should encode unicode"),
pytest.param(
"%FG", "%25FG", id="should encode % if it doesn't start a valid escape seq"
),
pytest.param(
"%00%FF", "%00%FF", id="should preserve non-utf8 encoded characters"
),
pytest.param(
"\x00\x7F\x80",
"%00%7F%C2%80",
id="should encode characters on the cache borders",
), # protects against off-by-one in cache implementation
],
)
def test_encode(input_, expected):
assert encode(input_) == expected
def test_encode_arguments():
assert encode("!@#$", exclude="@$") == "%21@%23$"
assert encode("%20%2G", keep_escaped=True) == "%20%252G"
assert encode("%20%2G", keep_escaped=False) == "%2520%252G"
assert encode("!@%25", exclude="@", keep_escaped=False) == "%21@%2525"
def test_encode_surrogates():
# bad surrogates (high)
assert encode("\uD800foo") == "%EF%BF%BDfoo"
assert encode("foo\uD800") == "foo%EF%BF%BD"
# bad surrogates (low)
assert encode("\uDD00foo") == "%EF%BF%BDfoo"
assert encode("foo\uDD00") == "foo%EF%BF%BD"
# valid one
# (the codepoint is "D800 DD00" in UTF-16BE)
assert encode("𐄀") == "%F0%90%84%80"
|