File: test_utf8.py

package info (click to toggle)
python-skytools 3.9.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 620 kB
  • sloc: python: 6,394; ansic: 929; makefile: 38; awk: 14
file content (23 lines) | stat: -rw-r--r-- 809 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

import pytest

from skytools.utf8 import safe_utf8_decode, sanitize_unicode


def test_safe_decode() -> None:
    assert safe_utf8_decode(b"foobar") == (True, "foobar")
    assert safe_utf8_decode(b'X\0Z') == (False, "X\uFFFDZ")
    assert safe_utf8_decode(b"OK") == (True, "OK")
    assert safe_utf8_decode(b'X\xF1Y') == (False, "X\uFFFDY")

    assert sanitize_unicode(u'\uD801\uDC01') == "\U00010401"

    with pytest.raises(TypeError):
        sanitize_unicode(b'qwe')    # type: ignore[arg-type]

## these give different results in py27 and py35
# >>> _norm(safe_utf8_decode(b'X\xed\xa0\x80Y\xed\xb0\x89Z'))
# (False, ['X', 65533, 65533, 65533, 'Y', 65533, 65533, 65533, 'Z'])
# >>> _norm(safe_utf8_decode(b'X\xed\xa0\x80\xed\xb0\x89Z'))
# (False, ['X', 65533, 65533, 65533, 65533, 65533, 65533, 'Z'])