File: test_normality.py

package info (click to toggle)
normality 3.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 180 kB
  • sloc: python: 1,275; makefile: 17
file content (114 lines) | stat: -rw-r--r-- 3,158 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from datetime import datetime, UTC

from normality import normalize, latinize_text, ascii_text
from normality import (
    stringify,
    slugify,
    guess_encoding,
    guess_file_encoding,
    predict_file_encoding,
    predict_encoding,
)


def test_empty():
    assert slugify(None) is None
    assert ascii_text(None) == ""  # type: ignore
    assert ascii_text("") == ""
    assert latinize_text(None) == ""  # latinize_text returns empty string for None
    assert normalize(None) is None
    assert normalize("") is None
    assert normalize(" ") is None


def test_petro():
    text = "Порошенко Петро Олексійович"
    assert slugify(text) == "porosenko-petro-oleksijovic"
    assert ascii_text(text) == "Porosenko Petro Oleksijovic"
    assert latinize_text(text) == "Porošenko Petro Oleksíjovič"
    assert normalize(text) == "порошенко петро олексіиович"


def test_ahmad():
    text = "əhməd"
    assert ascii_text(text) == "ahmad"


def test_azeri():
    text = "FUAD ALIYEV ƏHMƏD OĞLU"
    assert ascii_text(text) == "FUAD ALIYEV AHMAD OGLU"


def test_slugify():
    text = "BABY! camel-is good"
    assert slugify(text, sep="-") == "baby-camel-is-good"
    assert slugify("testʼs", sep="-") == "tests"
    assert slugify("test_s", sep="-") == "test-s"
    assert slugify("-", sep="-") is None
    assert slugify("", sep="-") is None
    assert slugify("- -", sep="-") is None
    assert slugify(None, sep="-") is None


def test_georgian():
    text = "ავლაბრის ფონდი"
    assert ascii_text(text) == "avlabris pondi"


def test_german():
    text = "Häschen Spaß"
    assert ascii_text(text) == "Haschen Spass"
    assert slugify(text, sep="-") == "haschen-spass"


def test_stringify():
    assert stringify(" . ") == "."
    assert stringify(5) == "5"
    assert stringify(0.5) == "0.5"


def test_stringify_datetime():
    dt = datetime.now(UTC)
    text = stringify(dt)
    assert text is not None
    assert text.startswith("%s-" % dt.year), text


def test_guess_encoding():
    text = "Порошенко Петро Олексійович"
    encoded = text.encode("iso-8859-5")
    out = guess_encoding(encoded)
    assert out == "iso8859-5"


def test_predict_encoding():
    text = "Порошенко Петро Олексійович"
    encoded = text.encode("iso-8859-5")
    out = predict_encoding(encoded)
    assert out == "iso8859-5"


def test_guess_file_encoding():
    with open("tests/fixtures/utf-16.txt", "rb") as fh:
        out = guess_file_encoding(fh)
        assert out == "utf-16"


def test_predict_file_encoding():
    with open("tests/fixtures/utf-16.txt", "rb") as fh:
        out = predict_file_encoding(fh)
        assert out == "utf-16"


def test_petro_iso_encoded():
    text = "Порошенко Петро Олексійович"
    encoded = text.encode("iso8859-5")
    out = stringify(encoded)
    assert out == text


def test_petro_utf16_encoded():
    text = "Порошенко Петро Олексійович"
    encoded = text.encode("utf-16")
    out = stringify(encoded)
    assert out == text