1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
|
# -*- coding: utf-8 -*-
"""
PC-BASIC tests.test_codepage
Codepage functionality tests
(c) 2020--2023 Rob Hagemans
This file is released under the GNU GPL version 3 or later.
"""
from io import open
from pcbasic import Session
from pcbasic.data import read_codepage
from tests.unit.utils import TestCase, run_tests
class CodepageTest(TestCase):
"""Unit tests for codepage functionality."""
tag = u'codepage'
def test_nobox(self):
"""Test no box protection."""
cp_936 = read_codepage('936')
with Session(
codepage=cp_936, box_protect=False, textfile_encoding='utf-8',
devices={'c': self.output_path()},
) as s:
s.execute('open "c:boxtest.txt" for output as 1')
s.execute('PRINT#1, CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
# to screen
s.execute('PRINT CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
# bytes text
# bytes text
output_bytes = [_row.strip() for _row in self.get_text(s)]
# unicode text
output_unicode = [_row.strip() for _row in self.get_text(s, as_type=type(u''))]
with open(self.output_path('BOXTEST.TXT'), 'r', encoding='utf-8') as f:
assert f.read() == u'\ufeff谀哪哪哪哪目\n\x1a'
assert output_bytes[0] == b'\xda\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xbf'
assert output_unicode[0] == u'谀哪哪哪哪目'
def test_box(self):
"""Test box protection."""
cp_936 = read_codepage('936')
with Session(
codepage=cp_936, box_protect=True, textfile_encoding='utf-8',
devices={'c': self.output_path()},
) as s:
# to file
s.execute('open "c:boxtest.txt" for output as 1')
s.execute('PRINT#1, CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
# to screen
s.execute('PRINT CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
# bytes text
output_bytes = [_row.strip() for _row in self.get_text(s)]
# unicode text
output_unicode = [_row.strip() for _row in self.get_text(s, as_type=type(u''))]
with open(self.output_path('BOXTEST.TXT'), 'r', encoding='utf-8') as f:
assert f.read() == u'\ufeff┌──────────┐\n\x1a'
assert output_bytes[0] == b'\xda\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xbf'
assert output_unicode[0] == u'┌──────────┐'
def test_box2(self):
"""Test box protection cases."""
cp_936 = read_codepage('936')
with Session(codepage=cp_936, box_protect=True) as s:
s.execute('a$= "+"+STRING$(3,CHR$(196))+"+"')
s.execute('b$= "+"+STRING$(2,CHR$(196))+"+"')
s.execute('c$= "+"+STRING$(1,CHR$(196))+"+"')
s.execute('d$= "+"+CHR$(196)+chr$(196)+chr$(190)+chr$(196)+"+"')
assert s.get_variable('a$') == b'+\xc4\xc4\xc4+'
assert s.get_variable('b$') == b'+\xc4\xc4+'
assert s.get_variable('c$') == b'+\xc4+'
assert s.get_variable('d$') == b'+\xc4\xc4\xbe\xc4+'
# three consecutive lines are protected
assert s.get_variable('a$', as_type=type(u'')) == u'+\u2500\u2500\u2500+'
# two consecutive lines are not
assert s.get_variable('b$', as_type=type(u'')) == u'+\u54ea+'
# single lead byte is shown as box drawing
assert s.get_variable('c$', as_type=type(u'')) == u'+\u2500+'
# two box lines followed by a non-box lead & trail byte - not protected
assert s.get_variable('d$', as_type=type(u'')) == u'+\u54ea\u7078+'
def test_hello(self):
"""Hello world in 9 codepages."""
hello = {
# contains \u064b which is not in 720
#'720': u'أهلاً بالعالم',
'720': u'أهلا بالعالم',
'737': u'Γεια σου κόσμε',
'862': u'שלום עולם',
'866': u'Здравствуй, мир',
# combining graphemes \u0e27\u0e31 \u0e14\u0e35 are in codepage as separate chars
# so converting to bytes fails
#'874': u'สวัสดีโลก',
#'874': u'\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35\u0e42\u0e25\u0e01',
'932': u'こんにちは、 世界',
'936': u'你好世界',
'949': u'반갑다 세상아',
'viscii': u'Xin chào thế giới',
}
# note that we're making a round-trip conversion unicode -> codepage -> unicode
# this doesn't always work
for cp, hi in hello.items():
with open(self.output_path(hi), 'w', encoding='utf-8') as f:
f.write(hi)
cp_dict = read_codepage(cp)
with Session(
codepage=cp_dict, textfile_encoding='utf-8', devices={'c': self.output_path()},
) as s:
s.execute(u'cls:print "{}"'.format(hi))
#TODO: (api) should have an errors= option in convert?
#TODO: (codepages) only perform grapheme clustering if the codepage has actual clusters in code points? (but: non-canonical combinations) override clustering if clustering elements in codepage?
#cp_inv = {_v: _k for _k, _v in cp_dict.items()}
#print repr(hi), repr(s.convert(hi, to_type=type(b''))), repr([cp_inv[x] for x in hi])
s.execute(u'open "c:{}" for input as 1'.format(hi))
s.execute('line input#1, a$')
assert s.get_variable('a$', as_type=type(u'')) == hi
output_unicode = [_row.strip() for _row in self.get_text(s, as_type=type(u''))]
assert output_unicode[0] == hi
def test_missing(self):
"""Test codepage with missing codepoints."""
cp = {b'\xff': u'B'}
with Session(codepage=cp) as s:
s.execute('a$ = "abcde" + chr$(255)')
assert s.get_variable('a$') == b'abcde\xff'
assert s.get_variable('a$', as_type=type(u'')) == u'\0\0\0\0\0B'
def test_non_nfc(self):
"""Test conversion of non-NFC sequences."""
with Session() as s:
# a-acute in NFD
s.execute(u'a$ = "a\u0301"')
# codepage 437 for a-acute
assert s.get_variable('a$') == b'\xa0'
def test_lone_nul(self):
"""Test converting a lone NUL from unicode to bytes."""
with Session() as s:
bstr = s.convert(u'\0', to_type=type(b''))
assert bstr == b'\0', bstr
def test_eascii(self):
"""Test converting an eascii sequence from unicode to bytes."""
with Session() as s:
bstr = s.convert(u'\0\1', to_type=type(b''))
assert bstr == b'\0\1', bstr
def test_control(self):
"""Test converting a control character from unicode to bytes."""
with Session() as s:
bstr = s.convert(u'\r', to_type=type(b''))
assert bstr == b'\r', bstr
def test_grapheme_sequence(self):
"""Test converting a multi-codepoint grapheme sequence."""
cp = read_codepage('russup4ac')
with Session(codepage=cp) as s:
bstr = s.convert(u'\u041e\u041e\u0301\u263a', to_type=type(b''))
assert bstr == b'\x8e\xc5\1', bstr
##############################################################################
from io import StringIO, BytesIO
import pickle
from pcbasic.compat import copyreg
from pcbasic.basic.codepage import InputStreamWrapper, OutputStreamWrapper, NewlineWrapper
from pcbasic.basic.codepage import Codepage
#from pcbasic import state
def unpickle_stringio(buffer, pos):
f = StringIO(buffer)
f.seek(pos)
return f
def pickle_stringio(f):
return unpickle_stringio, (f.getvalue(), f.tell())
copyreg.pickle(StringIO, pickle_stringio)
class StreamWrapperTest(TestCase):
"""Unit tests for stream wrappers."""
tag = u'codepage'
def test_read(self):
"""Test InputStreamWrapper.read()."""
# unicode stream
stream = StringIO(u'£abcde£')
# use default codepage 437
wrapper = InputStreamWrapper(stream, Codepage())
# read codepage bytes
assert wrapper.read(1) == b'\x9c'
assert wrapper.read(1) == b'a'
assert wrapper.read() == b'bcde\x9c'
def test_write(self):
"""Test OutputStreamWrapper.write()."""
stream = StringIO()
wrapper = OutputStreamWrapper(stream, Codepage())
wrapper.write(b'\x9cabcde\x9c')
assert stream.getvalue() == u'£abcde£'
def test_pickle(self):
"""Wrapped streams must be picklable."""
# unicode stream
stream = StringIO(u'£abcde£')
# use default codepage 437
wrapper = InputStreamWrapper(stream, Codepage())
wrapper.read(2)
pstr = pickle.dumps(wrapper)
wrapper2 = pickle.loads(pstr)
assert wrapper2.read() == b'bcde\x9c'
def test_newline_read(self):
"""Exercise NewlineWrapper."""
stream = BytesIO(b'1\r\n2\r3\n')
wrapper = NewlineWrapper(stream)
assert wrapper.read(0) == b''
assert wrapper.read(2) == b'1\r'
assert wrapper.read() == b'2\r3\r'
if __name__ == '__main__':
run_tests()
|