File: test_codepage.py

package info (click to toggle)
pcbasic 2.0.7-8
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 35,416 kB
  • sloc: python: 28,411; sh: 103; makefile: 10
file content (235 lines) | stat: -rw-r--r-- 9,394 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# -*- coding: utf-8 -*-

"""
PC-BASIC tests.test_codepage
Codepage functionality tests

(c) 2020--2023 Rob Hagemans
This file is released under the GNU GPL version 3 or later.
"""

from io import open

from pcbasic import Session
from pcbasic.data import read_codepage

from tests.unit.utils import TestCase, run_tests


class CodepageTest(TestCase):
    """Unit tests for codepage functionality."""

    tag = u'codepage'

    def test_nobox(self):
        """Test no box protection."""
        cp_936 = read_codepage('936')
        with Session(
                codepage=cp_936, box_protect=False, textfile_encoding='utf-8',
                devices={'c': self.output_path()},
            ) as s:
            s.execute('open "c:boxtest.txt" for output as 1')
            s.execute('PRINT#1, CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
            # to screen
            s.execute('PRINT CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
            # bytes text
            # bytes text
            output_bytes = [_row.strip() for _row in self.get_text(s)]
            # unicode text
            output_unicode = [_row.strip() for _row in self.get_text(s, as_type=type(u''))]
        with open(self.output_path('BOXTEST.TXT'), 'r', encoding='utf-8') as f:
            assert f.read() == u'\ufeff谀哪哪哪哪目\n\x1a'
        assert output_bytes[0] == b'\xda\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xbf'
        assert output_unicode[0] == u'谀哪哪哪哪目'

    def test_box(self):
        """Test box protection."""
        cp_936 = read_codepage('936')
        with Session(
                codepage=cp_936, box_protect=True, textfile_encoding='utf-8',
                devices={'c': self.output_path()},
            ) as s:
            # to file
            s.execute('open "c:boxtest.txt" for output as 1')
            s.execute('PRINT#1, CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
            # to screen
            s.execute('PRINT CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
            # bytes text
            output_bytes = [_row.strip() for _row in self.get_text(s)]
            # unicode text
            output_unicode = [_row.strip() for _row in self.get_text(s, as_type=type(u''))]
        with open(self.output_path('BOXTEST.TXT'), 'r', encoding='utf-8') as f:
            assert f.read() == u'\ufeff┌──────────┐\n\x1a'
        assert output_bytes[0] == b'\xda\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xbf'
        assert output_unicode[0] == u'┌──────────┐'

    def test_box2(self):
        """Test box protection cases."""
        cp_936 = read_codepage('936')
        with Session(codepage=cp_936, box_protect=True) as s:
            s.execute('a$= "+"+STRING$(3,CHR$(196))+"+"')
            s.execute('b$= "+"+STRING$(2,CHR$(196))+"+"')
            s.execute('c$= "+"+STRING$(1,CHR$(196))+"+"')
            s.execute('d$= "+"+CHR$(196)+chr$(196)+chr$(190)+chr$(196)+"+"')
            assert s.get_variable('a$') == b'+\xc4\xc4\xc4+'
            assert s.get_variable('b$') == b'+\xc4\xc4+'
            assert s.get_variable('c$') == b'+\xc4+'
            assert s.get_variable('d$') == b'+\xc4\xc4\xbe\xc4+'
            # three consecutive lines are protected
            assert s.get_variable('a$', as_type=type(u'')) == u'+\u2500\u2500\u2500+'
            # two consecutive lines are not
            assert s.get_variable('b$', as_type=type(u'')) == u'+\u54ea+'
            # single lead byte is shown as box drawing
            assert s.get_variable('c$', as_type=type(u'')) == u'+\u2500+'
            # two box lines followed by a non-box lead & trail byte - not protected
            assert s.get_variable('d$', as_type=type(u'')) == u'+\u54ea\u7078+'

    def test_hello(self):
        """Hello world in 9 codepages."""
        hello = {
            # contains \u064b which is not in 720
            #'720': u'أهلاً بالعالم',
            '720': u'أهلا بالعالم',
            '737': u'Γεια σου κόσμε',
            '862': u'שלום עולם',
            '866': u'Здравствуй, мир',
            # combining graphemes \u0e27\u0e31 \u0e14\u0e35 are in codepage as separate chars
            # so converting to bytes fails
            #'874': u'สวัสดีโลก',
            #'874': u'\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35\u0e42\u0e25\u0e01',
            '932': u'こんにちは、 世界',
            '936': u'你好世界',
            '949': u'반갑다 세상아',
            'viscii': u'Xin chào thế giới',
        }
        # note that we're making a round-trip conversion unicode -> codepage -> unicode
        # this doesn't always work
        for cp, hi in hello.items():
            with open(self.output_path(hi), 'w', encoding='utf-8') as f:
                f.write(hi)
            cp_dict = read_codepage(cp)
            with Session(
                    codepage=cp_dict, textfile_encoding='utf-8', devices={'c': self.output_path()},
                ) as s:
                s.execute(u'cls:print "{}"'.format(hi))

                #TODO: (api) should have an errors= option in convert?
                #TODO: (codepages) only perform grapheme clustering if the codepage has actual clusters in code points? (but: non-canonical combinations) override clustering if clustering elements in codepage?
                #cp_inv = {_v: _k for _k, _v in cp_dict.items()}
                #print repr(hi), repr(s.convert(hi, to_type=type(b''))), repr([cp_inv[x] for x in hi])

                s.execute(u'open "c:{}" for input as 1'.format(hi))
                s.execute('line input#1, a$')
                assert s.get_variable('a$', as_type=type(u'')) == hi
                output_unicode = [_row.strip() for _row in self.get_text(s, as_type=type(u''))]
                assert output_unicode[0] == hi

    def test_missing(self):
        """Test codepage with missing codepoints."""
        cp = {b'\xff': u'B'}
        with Session(codepage=cp) as s:
            s.execute('a$ = "abcde" + chr$(255)')
            assert s.get_variable('a$') == b'abcde\xff'
            assert s.get_variable('a$', as_type=type(u'')) == u'\0\0\0\0\0B'

    def test_non_nfc(self):
        """Test conversion of non-NFC sequences."""
        with Session() as s:
            # a-acute in NFD
            s.execute(u'a$ = "a\u0301"')
            # codepage 437 for a-acute
            assert s.get_variable('a$') == b'\xa0'

    def test_lone_nul(self):
        """Test converting a lone NUL from unicode to bytes."""
        with Session() as s:
            bstr = s.convert(u'\0', to_type=type(b''))
        assert bstr == b'\0', bstr

    def test_eascii(self):
        """Test converting an eascii sequence from unicode to bytes."""
        with Session() as s:
            bstr = s.convert(u'\0\1', to_type=type(b''))
        assert bstr == b'\0\1', bstr

    def test_control(self):
        """Test converting a control character from unicode to bytes."""
        with Session() as s:
            bstr = s.convert(u'\r', to_type=type(b''))
        assert bstr == b'\r', bstr

    def test_grapheme_sequence(self):
        """Test converting a multi-codepoint grapheme sequence."""
        cp = read_codepage('russup4ac')
        with Session(codepage=cp) as s:
            bstr = s.convert(u'\u041e\u041e\u0301\u263a', to_type=type(b''))
        assert bstr == b'\x8e\xc5\1', bstr


##############################################################################

from io import StringIO, BytesIO
import pickle
from pcbasic.compat import copyreg

from pcbasic.basic.codepage import InputStreamWrapper, OutputStreamWrapper, NewlineWrapper
from pcbasic.basic.codepage import Codepage
#from pcbasic import state


def unpickle_stringio(buffer, pos):
    f = StringIO(buffer)
    f.seek(pos)
    return f

def pickle_stringio(f):
    return unpickle_stringio, (f.getvalue(), f.tell())

copyreg.pickle(StringIO, pickle_stringio)


class StreamWrapperTest(TestCase):
    """Unit tests for stream wrappers."""

    tag = u'codepage'

    def test_read(self):
        """Test InputStreamWrapper.read()."""
        # unicode stream
        stream = StringIO(u'£abcde£')
        # use default codepage 437
        wrapper = InputStreamWrapper(stream, Codepage())
        # read codepage bytes
        assert wrapper.read(1) == b'\x9c'
        assert wrapper.read(1) == b'a'
        assert wrapper.read() == b'bcde\x9c'

    def test_write(self):
        """Test OutputStreamWrapper.write()."""
        stream = StringIO()
        wrapper = OutputStreamWrapper(stream, Codepage())
        wrapper.write(b'\x9cabcde\x9c')
        assert stream.getvalue() == u'£abcde£'

    def test_pickle(self):
        """Wrapped streams must be picklable."""
        # unicode stream
        stream = StringIO(u'£abcde£')
        # use default codepage 437
        wrapper = InputStreamWrapper(stream, Codepage())
        wrapper.read(2)
        pstr = pickle.dumps(wrapper)
        wrapper2 = pickle.loads(pstr)
        assert wrapper2.read() == b'bcde\x9c'

    def test_newline_read(self):
        """Exercise NewlineWrapper."""
        stream = BytesIO(b'1\r\n2\r3\n')
        wrapper = NewlineWrapper(stream)
        assert wrapper.read(0) == b''
        assert wrapper.read(2) == b'1\r'
        assert wrapper.read() == b'2\r3\r'


if __name__ == '__main__':
    run_tests()