File: test_hyp.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (57 lines) | stat: -rw-r--r-- 1,849 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import sys
import pytest
try:
    from hypothesis import given, strategies as st, example, settings, assume
except ImportError:
    pytest.skip("hypothesis required")

from pypy.module.unicodedata.interp_ucd import ucd
from rpython.rlib.rutf8 import codepoints_in_utf8

def make_normalization(space, NF_code):
    def normalize(s):
        u = s.encode('utf8')
        w_s = space.newutf8(u, codepoints_in_utf8(u))
        w_res = ucd.normalize(space, NF_code, w_s)
        return space.utf8_w(w_res).decode('utf8')
    return normalize

all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD']

# For every (n1, n2, n3) triple, applying n1 then n2 must be the same
# as applying n3.
# Reference: http://unicode.org/reports/tr15/#Design_Goals
compositions = [
    ('NFC', 'NFC', 'NFC'),
    ('NFC', 'NFD', 'NFD'),
    ('NFC', 'NFKC', 'NFKC'),
    ('NFC', 'NFKD', 'NFKD'),
    ('NFD', 'NFC', 'NFC'),
    ('NFD', 'NFD', 'NFD'),
    ('NFD', 'NFKC', 'NFKC'),
    ('NFD', 'NFKD', 'NFKD'),
    ('NFKC', 'NFC', 'NFKC'),
    ('NFKC', 'NFD', 'NFKD'),
    ('NFKC', 'NFKC', 'NFKC'),
    ('NFKC', 'NFKD', 'NFKD'),
    ('NFKD', 'NFC', 'NFKC'),
    ('NFKD', 'NFD', 'NFKD'),
    ('NFKD', 'NFKC', 'NFKC'),
    ('NFKD', 'NFKD', 'NFKD'),
]


@pytest.mark.parametrize('NF1, NF2, NF3', compositions)
@example(s=u"\u0128")
@example(s=u'---\uafb8\u11a7---')  # issue 2289
@settings(max_examples=1000)
@given(s=st.text())
def test_composition(s, space, NF1, NF2, NF3):
    # 'chr(0xfacf) normalizes to chr(0x2284a), which is too big')
    assume(not (s == u'\ufacf' and sys.maxunicode == 65535))
    norm1, norm2, norm3 = [make_normalization(space, form) for form in [NF1, NF2, NF3]]
    assert norm2(norm1(s)) == norm3(s)

if sys.maxunicode != 65535:
    # conditionally generate the example via an unwrapped decorator    
    test_composition = example(s=u'\ufacf')(test_composition)