File: unicode_identifiers_normalization.srctree

package info (click to toggle)
cython 3.0.11%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 19,092 kB
  • sloc: python: 83,539; ansic: 18,831; cpp: 1,402; xml: 1,031; javascript: 511; makefile: 403; sh: 204; sed: 11
file content (83 lines) | stat: -rw-r--r-- 2,261 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# -*- coding: utf-8 -*-
# mode: run
# tag: pure3.0, pep3131

PYTHON build_tests.py
# show behaviour in Python mode
PYTHON -m doctest test0.py
PYTHON -m doctest test1.py
PYTHON -m doctest test2.py

PYTHON setup.py build_ext --inplace
# test in Cython mode
PYTHON -c "import doctest; import test0 as m; exit(doctest.testmod(m)[0])"
PYTHON -c "import doctest; import test1 as m; exit(doctest.testmod(m)[0])"
PYTHON -c "import doctest; import test2 as m; exit(doctest.testmod(m)[0])"

########## setup.py #########

from Cython.Build.Dependencies import cythonize
from distutils.core import setup

setup(
  ext_modules = cythonize("test*.py"),
)

######### build_tests.py ########
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import sys
import unicodedata

# a few pairs of unicode strings that should be equivalent after normalization
string_pairs = [("fi", "fi"), # ligature and two letters
                ("a\u0301", '\u00e1'), # a with acute accent with combining character or as 1 character
                ("α\u0334\u0362", "α\u0362\u0334") # alpha with a pair of combining characters
                    # in a different order. No single character to normalize to
                ]

# Show that the pairs genuinely aren't equal before normalization
for sp in string_pairs:
    assert sp[0] != sp[1]
    assert unicodedata.normalize('NFKC', sp[0]) == unicodedata.normalize('NFKC', sp[1])
    
# some code that accesses the identifiers through the two different names
#  contains doctests
example_code = [
"""
class C:
    '''
    >>> C().get()
    True
    '''
    def __init__(self):
        self.{0} = True
    def get(self):
        return self.{1}
""", """
def pass_through({0}):
    '''
    >>> pass_through(True)
    True
    '''
    return {1}
""", """
import cython
{0} = True
def test():
    '''
    >>> test()
    True
    '''
    return {1}
"""]

from io import open

for idx, (code, strings) in enumerate(zip(example_code, string_pairs)):
    with open("test{0}.py".format(idx), "w", encoding="utf8") as f:
        code = code.format(*strings)
        f.write("# -*- coding: utf-8 -*-\n")
        # The code isn't Py2 compatible. Only write actual code in Py3+.
        if sys.version_info[0] > 2:
            f.write(code)