1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
from normality.cleaning import remove_unsafe_chars, collapse_spaces, squash_spaces
def test_remove_unsafe_chars():
assert remove_unsafe_chars(None) == "" # type: ignore
assert remove_unsafe_chars("") == ""
assert remove_unsafe_chars(" ") == " "
assert remove_unsafe_chars("\u2028 ") == " "
assert remove_unsafe_chars("\ufeff ") == " "
assert remove_unsafe_chars("lalala\ufeff ") == "lalala "
assert remove_unsafe_chars("lalala\u200bx") == "lalalax"
def test_collapse_spaces():
assert collapse_spaces(None) is None # type: ignore
assert collapse_spaces("") is None
assert collapse_spaces(" ") is None
assert collapse_spaces(" ") is None
assert collapse_spaces("\xa0") is None
assert collapse_spaces(" \n ") is None
assert collapse_spaces(" \n\n ") is None
assert collapse_spaces(" \njfshdhdfjk\n ") == "jfshdhdfjk"
assert collapse_spaces(" \njfshd\t\thdfjk\n ") == "jfshd hdfjk"
assert collapse_spaces(" \n\u2028\u2029\u3000\n ") is None
assert collapse_spaces("a\u3000x") == "a x"
def test_squash_spaces():
assert squash_spaces("") == ""
assert squash_spaces(" ") == ""
assert squash_spaces(" ") == ""
assert squash_spaces("\xa0") == ""
assert squash_spaces(" \n ") == ""
assert squash_spaces(" \n\n ") == ""
assert squash_spaces(" \njfshdhdfjk\n ") == "jfshdhdfjk"
assert squash_spaces(" \njfshd\t\thdfjk\n ") == "jfshd hdfjk"
assert squash_spaces(" \n\u2028\u2029\u200b\u200c\n ") == ""
assert squash_spaces("a\u3000x") == "a x"
assert squash_spaces("a\u200bx") == "ax"
|