1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
# std imports
import os
import codecs
# 3rd party
import pytest
try:
# python 2
_ = unichr
except NameError:
# python 3
unichr = chr
# some tests cannot be done on some builds of python, where the internal
# unicode structure is limited to 0x10000 for memory conservation,
# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
try:
unichr(0x2fffe)
NARROW_ONLY = False
except ValueError:
NARROW_ONLY = True
# local
import wcwidth
def make_sequence_from_line(line):
# convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def emoji_zwj_sequence():
u"""
Emoji zwj sequence of four codepoints is just 2 cells.
"""
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
expect_length_each = (2, 0, 0, 2)
expect_length_phrase = 2
# exercise,
length_each = tuple(map(wcwidth.wcwidth, phrase))
length_phrase = wcwidth.wcswidth(phrase)
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_unfinished_zwj_sequence():
u"""
Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
"""
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
expect_length_each = (2, 0, 0)
expect_length_phrase = 2
# exercise,
length_each = tuple(map(wcwidth.wcwidth, phrase))
length_phrase = wcwidth.wcswidth(phrase)
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_non_recommended_zwj_sequence():
"""
Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
"""
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
expect_length_each = (2, 0, 0)
expect_length_phrase = 2
# exercise,
length_each = tuple(map(wcwidth.wcwidth, phrase))
length_phrase = wcwidth.wcswidth(phrase)
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_another_emoji_zwj_sequence():
phrase = (
u"\u26F9" # PERSON WITH BALL
u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2
u"\u200D" # ZERO WIDTH JOINER
u"\u2640" # FEMALE SIGN
u"\uFE0F") # VARIATION SELECTOR-16
expect_length_each = (1, 0, 0, 1, 0)
expect_length_phrase = 2
# exercise,
length_each = tuple(map(wcwidth.wcwidth, phrase))
length_phrase = wcwidth.wcswidth(phrase)
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_longer_emoji_zwj_sequence():
"""
A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
Also test the same sequence in duplicate, verifying multiple VS-16 sequences
in a single function call.
"""
# 'Category Code', 'East Asian Width property' -- 'description'
phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
u"\U0001F48B" # 'So', 'W' -- KISS MARK
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
u"\U0001F9D1" # 'So', 'W' -- ADULT
u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
) * 2
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
expect_length_phrase = 4
# exercise,
length_each = tuple(map(wcwidth.wcwidth, phrase))
length_phrase = wcwidth.wcswidth(phrase)
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
def read_sequences_from_file(filename):
fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
lines = [line.strip()
for line in fp.readlines()
if not line.startswith('#') and line.strip()]
fp.close()
sequences = [make_sequence_from_line(line) for line in lines]
return lines, sequences
@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
def test_recommended_emoji_zwj_sequences():
"""
Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
"""
# given,
lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
errors = []
# Exercise, track by zipping with original text file line, a debugging aide
num = 0
for sequence, line in zip(sequences, lines):
num += 1
measured_width = wcwidth.wcswidth(sequence)
if measured_width != 2:
errors.append({
'expected_width': 2,
'line': line,
'measured_width': measured_width,
'sequence': sequence,
})
# verify
assert errors == []
assert num >= 1468
def test_recommended_variation_16_sequences():
"""
Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
"""
# given,
lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
errors = []
num = 0
for sequence, line in zip(sequences, lines):
num += 1
if '\ufe0f' not in sequence:
# filter for only \uFE0F (VS-16)
continue
measured_width = wcwidth.wcswidth(sequence)
if measured_width != 2:
errors.append({
'expected_width': 2,
'line': line,
'measured_width': wcwidth.wcswidth(sequence),
'sequence': sequence,
})
# verify
assert errors == []
assert num >= 742
def test_unicode_9_vs16():
"""Verify effect of VS-16 on unicode_version 9.0 and later"""
phrase = (u"\u2640" # FEMALE SIGN
u"\uFE0F") # VARIATION SELECTOR-16
expect_length_each = (1, 0)
expect_length_phrase = 2
# exercise,
length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
def test_unicode_8_vs16():
"""Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
phrase = (u"\u2640" # FEMALE SIGN
u"\uFE0F") # VARIATION SELECTOR-16
expect_length_each = (1, 0)
expect_length_phrase = 1
# exercise,
length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
|