File: test_cleaning.py

package info (click to toggle)
normality 3.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 180 kB
  • sloc: python: 1,275; makefile: 17
file content (39 lines) | stat: -rw-r--r-- 1,599 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from normality.cleaning import remove_unsafe_chars, collapse_spaces, squash_spaces


def test_remove_unsafe_chars():
    assert remove_unsafe_chars(None) == ""  # type: ignore
    assert remove_unsafe_chars("") == ""
    assert remove_unsafe_chars(" ") == " "
    assert remove_unsafe_chars("\u2028 ") == "  "
    assert remove_unsafe_chars("\ufeff ") == " "
    assert remove_unsafe_chars("lalala\ufeff ") == "lalala "
    assert remove_unsafe_chars("lalala\u200bx") == "lalalax"


def test_collapse_spaces():
    assert collapse_spaces(None) is None  # type: ignore
    assert collapse_spaces("") is None
    assert collapse_spaces(" ") is None
    assert collapse_spaces("  ") is None
    assert collapse_spaces("\xa0") is None
    assert collapse_spaces(" \n ") is None
    assert collapse_spaces(" \n\n ") is None
    assert collapse_spaces(" \njfshdhdfjk\n ") == "jfshdhdfjk"
    assert collapse_spaces(" \njfshd\t\thdfjk\n ") == "jfshd hdfjk"
    assert collapse_spaces(" \n\u2028\u2029\u3000\n ") is None
    assert collapse_spaces("a\u3000x") == "a x"


def test_squash_spaces():
    assert squash_spaces("") == ""
    assert squash_spaces(" ") == ""
    assert squash_spaces("  ") == ""
    assert squash_spaces("\xa0") == ""
    assert squash_spaces(" \n ") == ""
    assert squash_spaces(" \n\n ") == ""
    assert squash_spaces(" \njfshdhdfjk\n ") == "jfshdhdfjk"
    assert squash_spaces(" \njfshd\t\thdfjk\n ") == "jfshd hdfjk"
    assert squash_spaces(" \n\u2028\u2029\u200b\u200c\n ") == ""
    assert squash_spaces("a\u3000x") == "a x"
    assert squash_spaces("a\u200bx") == "ax"