File: test_regalloc.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (436 lines) | stat: -rw-r--r-- 14,759 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
""" explicit integration tests for register allocation in the x86 backend """

import pytest

from rpython.jit.backend.llsupport.test import test_regalloc_integration
from rpython.jit.backend.x86.assembler import Assembler386
from rpython.jit.backend.x86.arch import IS_X86_64, WIN64

class LogEntry(object):
    def __init__(self, position, name, *args):
        self.position = position
        self.name = name
        self.args = args

    def __repr__(self):
        r = repr(self.args)
        if self.name == "op":
            r = repr(self.args[1:])
            return "<%s: %s %s %s>" % (self.position, self.name, self.args[0], r.strip("(),"))
        return "<%s: %s %s>" % (self.position, self.name, r.strip("(),"))

class LoggingAssembler(Assembler386):
    def __init__(self, *args, **kwargs):
        self._instr_log = []
        Assembler386.__init__(self, *args, **kwargs)

    def _log(self, name, *args):
        self._instr_log.append(LogEntry(self._regalloc.rm.position, name, *args))

    def mov(self, from_loc, to_loc):
        self._log("mov", from_loc, to_loc)
        return Assembler386.mov(self, from_loc, to_loc)

    def regalloc_mov(self, from_loc, to_loc):
        self._log("mov", from_loc, to_loc)
        return Assembler386.mov(self, from_loc, to_loc)

    def regalloc_perform(self, op, arglocs, resloc):
        self._log("op", op.getopname(), arglocs, resloc)
        return Assembler386.regalloc_perform(self, op, arglocs, resloc)

    def regalloc_perform_discard(self, op, arglocs):
        self._log("op", op.getopname(), arglocs)
        return Assembler386.regalloc_perform_discard(self, op, arglocs)

    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
                               frame_depth):
        self._log("guard", guard_op.getopname(), arglocs, faillocs, resloc)
        return Assembler386.regalloc_perform_guard(self, guard_op, faillocs,
                arglocs, resloc, frame_depth)

    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
        self._log("malloc_cond", size, "ecx") # always uses edx and ecx

    def label(self):
        self._log("label", self._regalloc.final_jump_op.getdescr()._x86_arglocs)
        return Assembler386.label(self)

    def closing_jump(self, jump_target_descr):
        self._log("jump", self._regalloc.final_jump_op.getdescr()._x86_arglocs)
        return Assembler386.closing_jump(self, jump_target_descr)

class BaseTestCheckRegistersExplicitly(test_regalloc_integration.BaseTestRegalloc):
    def setup_class(cls):
        cls.cpu.assembler = LoggingAssembler(cls.cpu, False)
        cls.cpu.assembler.setup_once()

    def setup_method(self, meth):
        self.cpu.assembler._instr_log = self.log = []

    def teardown_method(self, meth):
        for l in self.log:
            print l

    def filter_log_moves(self):
        return [entry for entry in self.log if entry.name == "mov"]

class TestCheckRegistersExplicitly(BaseTestCheckRegistersExplicitly):
    def test_unused(self):
        ops = '''
        [i0, i1, i2, i3]
        i7 = int_add(i0, i1) # unused
        i9 = int_add(i2, i3)
        finish(i9)
        '''
        # does not crash
        self.interpret(ops, [5, 6, 7, 8])
        assert len([entry for entry in self.log if entry.args[0] == "int_add"]) == 1

    def test_bug_const(self):
        ops = '''
        [i0, i1, i2, i3]
        i9 = int_add(1, i3)
        finish(i9)
        '''
        # does not crash
        self.interpret(ops, [5, 6, 7, 8])
        assert len([entry for entry in self.log if entry.args[0] == "int_add"]) == 1

    def test_use_lea_even_for_stack(self):
        ops = '''
        [i0, i1, i2, i3]
        i9 = int_add(i3, 16)
        i4 = int_add(i3, 26)
        i6 = int_add(i9, i4)
        finish(i6)
        '''
        self.interpret(ops, [5, 6, 7, 8])
        assert len(self.filter_log_moves()) == 2

    def test_call_use_correct_regs(self):
        ops = '''
        [i0, i1, i2, i3]
        i7 = int_add(i0, i1)
        i8 = int_add(i2, 13)
        i9 = call_i(ConstClass(f1ptr), i7, descr=f1_calldescr)
        i10 = int_is_true(i9)
        guard_true(i10) [i8]
        finish(i9)
        '''
        self.interpret(ops, [5, 6, 7, 8])
        # two moves are needed from the stack frame to registers for arguments
        # i0 and i1, one for the result to the stack
        assert len(self.filter_log_moves()) == 3

    @pytest.mark.skip("later")
    def test_same_stack_entry_many_times(self):
        ops = '''
        [i0, i1, i2, i3]
        i7 = int_add(i0, i1)
        i8 = int_add(i2, i1)
        i9 = int_add(i3, i1)
        i10 = int_is_true(i9)
        guard_true(i10) [i8]
        finish(i7)
        '''
        self.interpret(ops, [5, 6, 7, 8])
        # 4 moves for arguments, 1 for result
        assert len(self.filter_log_moves()) == 5

    def test_coalescing(self):
        ops = '''
        [i0, i1, i3]
        i7 = int_add(i0, i1)
        i8 = int_add(i7, i3)
        i9 = call_i(ConstClass(f1ptr), i8, descr=f1_calldescr)
        i10 = int_is_true(i9)
        guard_true(i10) []
        finish(i9)
        '''
        self.interpret(ops, [5, 6, 8])
        # coalescing makes sure that i0 (and thus i8) lands in edi
        assert len(self.filter_log_moves()) == 2

    def test_coalescing_sub(self):
        ops = '''
        [i0, i1, i3]
        i7 = int_sub(i0, i1)
        i8 = int_sub(i7, i3)
        i9 = call_i(ConstClass(f1ptr), i8, descr=f1_calldescr)
        i10 = int_is_true(i9)
        guard_true(i10) []
        finish(i9)
        '''
        self.interpret(ops, [5, 6, 8])
        # coalescing makes sure that i7 (and thus i8) lands in edi
        assert len(self.filter_log_moves()) == 2

    def test_coalescing_mul(self):
        # won't test all symmetric operations, but at least check a second one
        ops = '''
        [i0, i1, i3]
        i7 = int_mul(i0, i1)
        i8 = int_mul(i7, i3)
        i9 = call_i(ConstClass(f1ptr), i8, descr=f1_calldescr)
        i10 = int_is_true(i9)
        guard_true(i10) []
        finish(i9)
        '''
        self.interpret(ops, [5, 6, 8])
        assert len(self.filter_log_moves()) == 2

    def test_lshift(self):
        ops = '''
        [i0, i1, i2, i3]
        i5 = int_add(i2, i3)
        i7 = int_lshift(i0, i5)
        i8 = int_lshift(i7, i3)
        i9 = call_i(ConstClass(f1ptr), 42, i8, descr=f2_calldescr)
        i10 = int_is_true(i9)
        guard_true(i10) []
        finish(i9)
        '''
        self.interpret(ops, [5, 6, 7, 8])
        if IS_X86_64:
            # 3 moves for arguments, 1 move for the constant 42, 1 move for result
            assert len(self.filter_log_moves()) == 5
        else:
            assert len(self.filter_log_moves()) == 4

    def test_binop_dont_swap_unnecessarily(self):
        ops = '''
        [i0, i1, i2, i3]
        i7 = int_add(i0, i1)
        i8 = int_add(i2, 13)
        i9 = int_add(i7, i8)
        i10 = int_is_true(i9)
        guard_true(i10) []
        finish(i9)
        '''
        self.interpret(ops, [5, 6, 7, 8])
        add1 = self.log[2]
        op = self.log[5]
        assert op.name == "op"
        # make sure that the arguments of the third op are not swapped (since
        # that would break coalescing between i7 and i9)
        assert op.args[1][0] is add1.args[-1]

    def test_jump_hinting(self):
        self.targettoken._ll_loop_code = 0
        ops = '''
        [i0]
        i1 = int_add(i0, 1)
        i10 = int_add(i1, 1)
        i2 = int_add(i1, 1)
        i3 = int_lt(i2, 20)
        guard_true(i3) [i1, i10]
        label(i2, descr=targettoken)
        i4 = int_add(i2, 1)
        i11 = int_add(i4, 1)
        i5 = int_add(i4, 1)
        i6 = int_lt(i5, 20)
        guard_true(i6) [i4, i11]
        jump(i5, descr=targettoken)
        '''
        self.interpret(ops, [0], run=False)
        assert len(self.filter_log_moves()) == 1

    def test_jump_hinting_duplicate(self):
        self.targettoken._ll_loop_code = 0
        ops = '''
        [i0]
        i1 = int_add(i0, 1)
        i10 = int_add(i1, 1)
        i2 = int_add(i1, 1)
        i3 = int_lt(i2, 20)
        guard_true(i3) [i1, i10]
        label(i2, i10, descr=targettoken)
        i4 = int_add(i2, 1)
        i11 = int_add(i4, i10)
        i5 = int_add(i4, 1)
        i6 = int_lt(i5, 20)
        guard_true(i6) [i4, i11]
        jump(i5, i5, descr=targettoken)
        '''
        self.interpret(ops, [0], run=False)
        assert len(self.filter_log_moves()) == 3


    def test_jump_hinting_int_add(self):
        self.targettoken._ll_loop_code = 0
        ops = '''
        [i0]
        i1 = int_add(i0, 1)
        i3 = int_lt(i1, 20)
        guard_true(i3) [i1]
        label(i1, descr=targettoken)
        i4 = int_add(i1, 1)
        i6 = int_lt(i4, 20)
        guard_true(i6) [i4]
        jump(i4, descr=targettoken)
        '''
        self.interpret(ops, [0], run=False)
        assert len(self.filter_log_moves()) == 1

    @pytest.mark.skip("later")
    def test_jump_different_args2(self):
        ops = '''
        [i0, i4, i6]
        i1 = int_add(i0, i6)
        i2 = int_lt(i1, 20)
        guard_true(i2) [i1]
        label(i4, i1, i6, descr=targettoken)
        i3 = int_add(i4, i6)
        i7 = int_lt(i3, 20)
        guard_true(i7) [i3]
        jump(i1, i3, i6, descr=targettoken)
        '''
        self.interpret(ops, [0], run=False)


    def test_flowcontext(self):
        # real index manipulation for a slicing operation done when translating
        # on top of pypy
        ops = """
        [i1, i2]
        i3 = int_and(i1, 255)
        i4 = int_rshift(i1, 8)
        i5 = int_and(i4, 255)
        i6 = int_lt(0, i5)
        guard_false(i6) [i1]
        i7 = int_eq(i3, 0)
        guard_false(i7) [i1]
        i8 = int_neg(i3)
        i9 = int_lt(i8, 0)
        guard_true(i9) [i1]
        i10 = int_lt(i2, 0)
        guard_false(i10) [i1]
        i11 = int_add(i8, i2)
        i12 = int_lt(i11, 0)
        guard_false(i12) [i1]
        i13 = int_gt(i11, i2)
        guard_false(i13) [i1]
        i14 = int_sub(i2, i11)
        i15 = int_is_zero(i14)
        guard_false(i15) [i1]
        # this simulates the arraycopy call
        i16 = call_i(ConstClass(f2ptr), i11, i14, descr=f2_calldescr)
        finish(i16)
        """
        self.interpret(ops, [0], run=False)
        # 4 moves, three for args, one for result
        assert len(self.filter_log_moves()) == 4

class TestCheckRegistersExplicitly64(BaseTestCheckRegistersExplicitly):
    def setup_class(self):
        if not IS_X86_64:
            pytest.skip("needs 64 bit")

    def test_call_use_argument_twice(self):
        ops = '''
        [i0, i1, i2, i3]
        i7 = int_add(i0, i1)
        i8 = int_add(i2, 13)
        i9 = call_i(ConstClass(f2ptr), i7, i7, descr=f2_calldescr)
        i10 = int_is_true(i9)
        guard_true(i10) [i8]
        finish(i9)
        '''
        self.interpret(ops, [5, 6, 7, 8])
        # two moves are needed from the stack frame to registers for arguments
        # i0 and i1, one for the result to the stack
        # one for the copy to the other argument register
        assert len(self.filter_log_moves()) == 4

    def test_coalescing_float(self):
        ops = '''
        [f0, f1, f3]
        f7 = float_add(f0, f1)
        f8 = float_add(f7, f3)
        f9 = call_f(ConstClass(ffptr), f8, 1.0, descr=ff_calldescr)
        i10 = float_ne(f9, 0.0)
        guard_true(i10) []
        finish(f9)
        '''
        self.interpret(ops, [5.0, 6.0, 8.0])
        assert len(self.filter_log_moves()) == 3

    def test_malloc(self, monkeypatch):
        ops = '''
        [i0]
        i1 = int_add(i0, 1) # this is using ecx or edx because it fits
        i6 = int_add(i0, 6) # this is using ecx or edx because it fits
        i2 = int_add(i6, i1)
        p0 = call_malloc_nursery(16)
        gc_store(p0, 0, 83944, 8)
        gc_store(p0, 8, i2, 8)
        i10 = int_is_true(i2)
        guard_true(i10) [p0, i0]
        finish(p0)
        '''
        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_top_addr", lambda: 61)
        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_free_addr", lambda: 68)
        self.interpret(ops, [0], run=False)
        # 2 moves, because the call_malloc_nursery hints prevent using ecx and
        # edx for any of the integer results
        assert len(self.filter_log_moves()) == 2

    def test_dict_lookup(self, monkeypatch):
        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_top_addr", lambda: 61)
        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_free_addr", lambda: 68)
        # real trace for a dict lookup
        ops = """
        [i172, i182, i201, p209, p0, p219]
        i184 = int_lt(i172, 0)
        guard_false(i184) []
        i185 = int_ge(i172, i182)
        guard_false(i185) []
        i187 = int_add(i172, 1)
        i202 = uint_ge(i172, i201)
        guard_false(i202) [i172]
        i221 = int_xor(i172, 3430008)
        i223 = int_mul(i221, 1000003)
        i230 = call_i(ConstClass(fgcrefptr), p209, descr=fgcref_calldescr)
        guard_no_exception() [p209, i230, i172]
        i232 = int_eq(i230, -1)
        i233 = int_sub(i230, i232)
        i234 = int_xor(i223, i233)
        i236 = int_mul(i234, 1082525)
        i238 = int_add(i236, 97531)
        i240 = int_eq(i238, -1)
        i241 = int_sub(i238, i240)
        p242 = force_token()
        p244 = call_malloc_nursery(40)
        gc_store(p244, 0, 83568, 8)
        p249 = nursery_ptr_increment(p244, 24)
        gc_store(p249, 0, 4656, 8)
        gc_store(p249, 8, i172, 8)
        #cond_call_gc_wb(p0)
        gc_store(p0, 8, p242, 8)
        i263 = call_may_force_i(ConstClass(fppiiptr), p219, p244, i241, 0, descr=fppii_calldescr)
        guard_not_forced() [p0, p249, p244, i263, p219]
        guard_no_exception() [p0, p249, p244, i263, p219]
        i265 = int_lt(i263, 0)
        guard_true(i265) [p0, p249, p244, i263, p219]
        finish(i263)
        """
        self.interpret(ops, [0], run=False)
        # the moves are:
        # 5 arguments
        # 1 result
        # 1 because lifetime of i172 does not end at the int_xor
        # 1 ptr to save before call
        # 3 for argument shuffling

        # XXX there is an additional mov, find out why!
        if not WIN64:
            assert len(self.filter_log_moves()) == 12
        else:
            # on Win64 we get:
            # 5 arguments (including the constant 0)
            # 1 result
            # 1 because lifetime of i172 does not end at the int_xor
            # 2 ptrs to save before call (p249, p244)
            # 1 for argument shuffling (ecx => edx)
            assert len(self.filter_log_moves()) == 10