File: test_encodings.py

package info (click to toggle)

python-ftfy 6.3.1-1

links: PTS, VCS
area: main
in suites: forky, sid
size: 808 kB
sloc: python: 1,716; makefile: 148

file content (20 lines) | stat: -rw-r--r-- 699 bytes

from ftfy import bad_codecs, guess_bytes


def test_cesu8():
    cls1 = bad_codecs.search_function("cesu8").__class__
    cls2 = bad_codecs.search_function("cesu-8").__class__
    assert cls1 == cls2

    test_bytes = b"\xed\xa6\x9d\xed\xbd\xb7 is an unassigned character, and \xc0\x80 is null"
    test_text = "\U00077777 is an unassigned character, and \x00 is null"
    assert test_bytes.decode("cesu8") == test_text


def test_russian_crash():
    thebytes = b"\xe8\xed\xe2\xe5\xed\xf2\xe0\xf0\xe8\xe7\xe0\xf6\xe8\xff "
    # We don't care what the result is, but this shouldn't crash
    thebytes.decode("utf-8-variants", "replace")

    # This shouldn't crash either
    guess_bytes(thebytes)