File: test_bugs.py

package info (click to toggle)
python-mutf8 1.0.6-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 124 kB
  • sloc: python: 342; ansic: 207; makefile: 3
file content (34 lines) | stat: -rw-r--r-- 1,178 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def test_issue_1(encoder, decoder):
    """
    Ensure we do not regress on https://github.com/TkTech/mutf8/issues/1.

    Two issues found here:

        1. Python-based decoder could hit an infinite loop, since it didn't
           *always* increment s_ix on an iteration.
        2. C & Python decoders would incorrectly decode the `sample` below,
           because the logic for surrogate pair decoding made the incorrect
           assumption that we could short-circuit when b1 == 0xED.
    """

    # b'[\xea\xb0\x80 \xeb\x82\x98 \xeb\x8b\xa4 \xeb\x9d\xbc \xeb\xa7\x88
    # \xeb\xb0\x94  \xec\x82\xac  \xec\x95\x84\xec\x9e\x90  \xec\xb0\xa8
    # \xec\xb9\xb4 \xed\x83\x80 \xed\x8c\x8c \xed\x95\x98]'
    sample = (
        u'[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14  \uC0AC  \uC544'
        u'\uC790  \uCC28 \uCE74 \uD0C0 \uD30C \uD558]'
    )

    encoded = encoder(sample)
    decoded = decoder(encoded)

    assert sample == decoded


def test_issue_3(encoder, decoder):
    """
    Underallocation due to an incorrect assumption on the maximum expansion
    of an encoded string.
    """
    str = '黑人抬棺組裝包'
    assert decoder(encoder(str)) == str