File: test_x86vector.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (164 lines) | stat: -rw-r--r-- 6,092 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import py
from rpython.jit.backend.x86.regloc import *
from rpython.jit.backend.llsupport.regalloc import Lifetime
from rpython.jit.backend.x86.regalloc import (RegAlloc,
        X86FrameManager, X86XMMRegisterManager, X86RegisterManager)
from rpython.jit.backend.x86.vector_ext import TempVector
from rpython.jit.backend.x86.test import test_basic
from rpython.jit.backend.x86.test.test_assembler import \
        (TestRegallocPushPop as BaseTestAssembler)
from rpython.jit.metainterp.test import test_zvector
from rpython.rtyper.lltypesystem import lltype
from rpython.jit.backend.detect_cpu import getcpuclass

class TestBasic(test_basic.Jit386Mixin, test_zvector.VectorizeTests):
    # for the individual tests see
    # ====> ../../../metainterp/test/test_basic.py
    def setup_method(self, method):
        clazz = self.CPUClass
        def init(*args, **kwargs):
            cpu = clazz(*args, **kwargs)
            # > 95% can be executed, thus let's cheat here a little
            cpu.supports_guard_gc_type = True
            return cpu
        self.CPUClass = init

    def supports_vector_ext(self):
        return self.CPUClass.vector_extension

    def test_list_vectorize(self):
        pass # needs support_guard_gc_type, disable for now

    enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'

@py.test.fixture
def regalloc(request):
    from rpython.jit.backend.x86.regalloc import X86FrameManager
    from rpython.jit.backend.x86.regalloc import X86XMMRegisterManager
    class FakeToken:
        class compiled_loop_token:
            asmmemmgr_blocks = None
    cpu = getcpuclass()(None, None)
    cpu.setup()
    if cpu.HAS_CODEMAP:
        cpu.codemap.setup()
    looptoken = FakeToken()
    asm = cpu.assembler
    asm.setup_once()
    asm.setup(looptoken)
    regalloc = RegAlloc(asm)
    regalloc.fm = fm = X86FrameManager(cpu.get_baseofs_of_frame_field())
    regalloc.rm = X86RegisterManager({}, frame_manager = fm, assembler = asm)
    regalloc.xrm = X86XMMRegisterManager({}, frame_manager = fm, assembler = asm)
    request.cls.asm = asm
    request.cls.regalloc = regalloc



class TestAssembler(BaseTestAssembler):

    def imm_4_int32(self, a, b, c, d):
        adr = self.xrm.assembler.datablockwrapper.malloc_aligned(16, 16)
        ptr = rffi.cast(rffi.CArrayPtr(rffi.INT), adr)
        ptr[0] = rffi.r_int(a)
        ptr[1] = rffi.r_int(b)
        ptr[2] = rffi.r_int(c)
        ptr[3] = rffi.r_int(d)
        return adr

    def test_simple_4_int_load_sum_x86_64(self):
        def callback(asm):
            if asm.mc.WORD != 8:
                py.test.skip()
            adr = self.imm_4_int32(123,543,0,0)
            asm.mc.MOV_ri(r8.value,adr)
            asm.mc.MOVDQU_xm(xmm7.value, (r8.value, 0))
            asm.mc.PADDD_xm(xmm7.value, (r8.value, 0))
            asm.mc.PADDD_xx(xmm7.value, xmm7.value)

            asm.mc.MOV_ri(edx.value, 0x00000000ffffffff)

            asm.mc.MOV_ri(eax.value, 0)
            asm.mc.MOVDQ_rx(ecx.value, xmm7.value)
            asm.mc.AND_rr(ecx.value, edx.value)
            asm.mc.ADD(eax, ecx)

            asm.mc.PSRLDQ_xi(xmm7.value, 4)
            asm.mc.MOVDQ_rx(ecx.value, xmm7.value)
            asm.mc.AND_rr(ecx.value, edx.value)
            asm.mc.ADD(eax, ecx)
        res = self.do_test(callback)
        assert res == 123*4 + 543*4

    def test_vector_store(self):
        def callback(asm):
            addr = self.imm_4_int32(11,12,13,14)
            asm.mov(ImmedLoc(addr), ecx)
            asm.mc.MOVDQU_xm(xmm6.value, (ecx.value,0))
            asm.mc.PADDD_xm(xmm6.value, (ecx.value,0))
            asm.mc.MOVDQU(AddressLoc(ecx,ImmedLoc(0)), xmm6)
            asm.mc.MOVDQU(xmm6, AddressLoc(ecx,ImmedLoc(0)))
            asm.mc.MOVDQ_rx(eax.value, xmm6.value)

        res = self.do_test(callback) & 0xffffffff
        assert res == 22


    def test_vector_store_aligned(self):
        def callback(asm):
            addr = self.imm_4_int32(11,12,13,14)
            asm.mov(ImmedLoc(addr), ecx)
            asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0)))
            asm.mc.PADDD_xm(xmm6.value, (ecx.value,0))
            asm.mc.MOVDQA(AddressLoc(ecx,ImmedLoc(0)), xmm6)
            asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0)))
            asm.mc.MOVDQ_rx(eax.value, xmm6.value)

        res = self.do_test(callback) & 0xffffffff
        assert res == 22

    def test_enforce_var(self, regalloc):
        arg = TempVector('f')
        args = []
        self.regalloc.fm.bindings[arg] = FrameLoc(0, 64, 'f')
        reg = self.regalloc.enforce_var_in_vector_reg(arg, args, xmm0)
        assert reg is xmm0

    def test_enforce_var_xmm0_forbidden(self, regalloc):
        arg = TempVector('f')
        arg1 = TempVector('f')
        args = [arg1]
        xrm = self.regalloc.xrm
        xrm.reg_bindings[arg1] = xmm0
        fr = xrm.free_regs
        xrm.free_regs = [r for r in fr if r is not xmm0]
        self.regalloc.fm.bindings[arg] = FrameLoc(0, 64, 'f')
        reg = self.regalloc.enforce_var_in_vector_reg(arg, args, xmm0)
        assert reg is xmm0
        assert len(xrm.reg_bindings) == 2
        assert xrm.reg_bindings[arg] == xmm0
        assert xrm.reg_bindings[arg1] != xmm0

    def test_enforce_var_spill(self, regalloc):
        arg = TempVector('f')
        arg1 = TempVector('f')
        arg2 = TempVector('f')
        args = []
        xrm = self.regalloc.xrm
        xrm.reg_bindings[arg1] = xmm0
        xrm.reg_bindings[arg2] = xmm1
        xrm.longevity[arg1] = Lifetime(0,1)
        xrm.longevity[arg2] = Lifetime(0,2)
        xrm.longevity[arg] = Lifetime(0,3)
        fr = xrm.free_regs
        xrm.free_regs = []
        self.regalloc.fm.bindings[arg] = FrameLoc(0, 64, 'f')
        self.regalloc.fm.bindings[arg2] = FrameLoc(0, 72, 'f')
        reg = self.regalloc.enforce_var_in_vector_reg(arg, args, xmm0)
        assert reg is xmm0
        assert len(xrm.reg_bindings) == 2
        assert xrm.reg_bindings[arg] == xmm0
        assert xrm.reg_bindings[arg1] == xmm1
        assert arg2 not in xrm.reg_bindings