File: test_ucd.py

package info (click to toggle)
pypy 7.3.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 113,660 kB
  • sloc: python: 1,419,707; ansic: 64,313; cpp: 3,290; sh: 2,763; makefile: 540; xml: 256; asm: 213; lisp: 45; awk: 4
file content (52 lines) | stat: -rw-r--r-- 1,561 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pytest
from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_11_0_0
from rpython.rtyper.test.tool import BaseRtypingTest
from rpython.translator.c.test.test_genc import compile


class TestTranslated(BaseRtypingTest):
    def test_translated(self):
        def f(n):
            if n == 0:
                return -1
            else:
                u = unicodedb_5_2_0.lookup("GOTHIC LETTER FAIHU")
                return u
        res = self.interpret(f, [1])
        print hex(res)
        assert res == f(1)


def test_code_to_unichr():
    def f(c):
        return ord(code_to_unichr(c)[0])
    f1 = compile(f, [int])
    got = f1(0x12346)
    if MAXUNICODE == 65535:
        assert got == 0xd808    # first char of a pair
    else:
        assert got == 0x12346

def test_cjk():
    cases = [
        ('3400', '4DB5'),
        ('4E00', '9FEF'),
        ('20000', '2A6D6'),
        ('2A700', '2B734'),
        ('2B740', '2B81D'),
        ('2B820', '2CEA1'),
    ]
    for first, last in cases:
        first = int(first, 16)
        last = int(last, 16)
        # Test at and inside the boundary
        for i in (first, first + 1, last - 1, last):
            charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
            assert unicodedb_11_0_0.lookup(charname) == i
        # Test outside the boundary
        for i in first - 1, last + 1:
            charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
            with pytest.raises(KeyError):
                unicodedb_11_0_0.lookup(charname)