1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
#!/usr/bin/env python
# coding: utf-8
"""
Manual tests comparing wcwidth.py to libc's wcwidth(3) and wcswidth(3).
https://github.com/jquast/wcwidth
This suite of tests compares the libc return values with the pure-python return
values. Although wcwidth(3) is POSIX, its actual implementation may differ,
so these tests are not guaranteed to be successful on all platforms, especially
where wcwidth(3)/wcswidth(3) is out of date. This is especially true for many
platforms -- usually conforming only to unicode specification 1.0 or 2.0.
This program accepts one optional command-line argument, the unicode version
level for our library to use when comparing to libc.
"""
# pylint: disable=C0103
# Invalid module name "wcwidth-libc-comparator"
# standard imports
from __future__ import print_function
# std imports
import sys
import locale
import warnings
import ctypes.util
import unicodedata
# local
# local imports
import wcwidth
def is_named(ucs):
"""
Whether the unicode point ``ucs`` has a name.
:rtype bool
"""
try:
return bool(unicodedata.name(ucs))
except ValueError:
return False
def is_not_combining(ucs):
return not unicodedata.combining(ucs)
def report_ucs_msg(ucs, wcwidth_libc, wcwidth_local):
"""
Return string report of combining character differences.
:param ucs: unicode point.
:type ucs: unicode
:param wcwidth_libc: libc-wcwidth's reported character length.
:type comb_py: int
:param wcwidth_local: wcwidth's reported character length.
:type comb_wc: int
:rtype: unicode
"""
ucp = (ucs.encode('unicode_escape')[2:]
.decode('ascii')
.upper()
.lstrip('0'))
url = "http://codepoints.net/U+{}".format(ucp)
name = unicodedata.name(ucs)
return (u"libc,ours={},{} [--o{}o--] name={} val={} {}"
" ".format(wcwidth_libc, wcwidth_local, ucs, name, ord(ucs), url))
# use chr() for py3.x,
# unichr() for py2.x
try:
_ = unichr(0)
except NameError as err:
if err.args[0] == "name 'unichr' is not defined":
# pylint: disable=W0622
# Redefining built-in 'unichr' (col 8)
unichr = chr
else:
raise
if sys.maxunicode < 1114111:
warnings.warn('narrow Python build, only a small subset of '
'characters may be tested.')
def _is_equal_wcwidth(libc, ucs, unicode_version):
w_libc = libc.wcwidth(ucs)
w_local = wcwidth.wcwidth(ucs, unicode_version)
assert w_libc == w_local, report_ucs_msg(ucs, w_libc, w_local)
def main(using_locale=('en_US', 'UTF-8',)):
"""
Program entry point.
Load the entire Unicode table into memory, excluding those that:
- are not named (func unicodedata.name returns empty string),
- are combining characters.
Using ``locale``, for each unicode character string compare libc's
wcwidth with local wcwidth.wcwidth() function; when they differ,
report a detailed AssertionError to stdout.
"""
all_ucs = (ucs for ucs in
[unichr(val) for val in range(sys.maxunicode)]
if is_named(ucs) and is_not_combining(ucs))
libc_name = ctypes.util.find_library('c')
if not libc_name:
raise ImportError("Can't find C library.")
libc = ctypes.cdll.LoadLibrary(libc_name)
libc.wcwidth.argtypes = [ctypes.c_wchar, ]
libc.wcwidth.restype = ctypes.c_int
assert getattr(libc, 'wcwidth', None) is not None
assert getattr(libc, 'wcswidth', None) is not None
locale.setlocale(locale.LC_ALL, using_locale)
unicode_version = 'latest'
if len(sys.argv) > 1:
unicode_version = sys.argv[1]
for ucs in all_ucs:
try:
_is_equal_wcwidth(libc, ucs, unicode_version)
except AssertionError as err:
print(err)
if __name__ == '__main__':
main()
|