File: callbuilder.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (352 lines) | stat: -rw-r--r-- 14,922 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
#!/usr/bin/env python

from rpython.jit.backend.llsupport import llerrno
from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
from rpython.jit.backend.llsupport.jump import remap_frame_layout
from rpython.jit.backend.riscv import registers as r
from rpython.jit.backend.riscv.arch import (
    ABI_STACK_ALIGN, FLEN, INST_SIZE, XLEN)
from rpython.jit.backend.riscv.codebuilder import OverwritingBuilder
from rpython.jit.backend.riscv.instructions import (
    AMO_ACQUIRE, AMO_RELEASE)
from rpython.jit.metainterp.history import FLOAT
from rpython.rlib.objectmodel import we_are_translated
from rpython.rtyper.lltypesystem import rffi


class RISCVCallBuilder(AbstractCallBuilder):
    def __init__(self, assembler, fnloc, arglocs, resloc, restype, ressize):
        AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
                                     resloc, restype, ressize)
        self.current_sp = 0

    def prepare_arguments(self):
        arglocs = self.arglocs

        non_float_locs = []
        non_float_regs = []
        float_locs = []
        float_regs = []
        stack_locs = []

        free_regs = [r.x17, r.x16, r.x15, r.x14, r.x13, r.x12, r.x11, r.x10]
        free_float_regs = [r.f17, r.f16, r.f15, r.f14, r.f13, r.f12, r.f11,
                           r.f10]

        # Collect argument registers.
        stack_adj_offset = 0
        for arg in arglocs:
            if arg.type == FLOAT:
                if free_float_regs:
                    float_locs.append(arg)
                    float_regs.append(free_float_regs.pop())
                elif free_regs:
                    # If float registers are exhausted but integer registers
                    # are still available, use integer registers.
                    non_float_locs.append(arg)
                    non_float_regs.append(free_regs.pop())
                else:
                    stack_adj_offset += FLEN
                    stack_locs.append(arg)
            else:
                if free_regs:
                    non_float_locs.append(arg)
                    non_float_regs.append(free_regs.pop())
                else:
                    stack_adj_offset += XLEN
                    stack_locs.append(arg)

        if stack_locs:
            # Adjust the stack pointer.
            stack_adj_offset = ((stack_adj_offset + ABI_STACK_ALIGN - 1)
                                    // ABI_STACK_ALIGN * ABI_STACK_ALIGN)
            assert stack_adj_offset <= 2**11, 'too many arguments'
            self.mc.ADDI(r.sp.value, r.sp.value, -stack_adj_offset)
            self.current_sp = stack_adj_offset

            # Spill argument values to the stack offset.
            sp_offset = 0
            for loc in stack_locs:
                self.asm.mov_loc_to_raw_stack(loc, sp_offset)
                sp_offset += FLEN if loc.type == FLOAT else XLEN

        # Assign the callee function address to the `ra` register.
        #
        # Note: In the RISC-V backend, the `ra` (`x1`) register is not an
        # allocatable register, thus it is preserved between
        # `remap_frame_layout` calls.
        if self.fnloc.is_core_reg():
            self.mc.MV(r.ra.value, self.fnloc.value)
        elif self.fnloc.is_imm():
            self.mc.load_int_imm(r.ra.value, self.fnloc.value)
        else:
            assert self.fnloc.is_stack()
            self.mc.load_int(r.ra.value, r.jfp.value, self.fnloc.value)
        self.fnloc = r.ra

        # Move augment values to argument registers.
        scratch_core_reg = r.x31
        scratch_fp_reg = r.f31

        remap_frame_layout(self.asm, non_float_locs, non_float_regs,
                           scratch_core_reg)
        if float_locs:
            remap_frame_layout(self.asm, float_locs, float_regs,
                               scratch_fp_reg)

    def push_gcmap(self):
        noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
        gcmap = self.asm._regalloc.get_gcmap([r.x10], noregs=noregs)
        self.asm.push_gcmap(self.mc, gcmap)

    def pop_gcmap(self):
        scratch_reg = r.x12  # caller-saved scratch reg other than ra, x31
        self.asm._reload_frame_if_necessary(self.mc, tmplocs=[scratch_reg])
        self.asm.pop_gcmap(self.mc)

    def emit_raw_call(self):
        assert self.fnloc is r.ra
        self.mc.JALR(self.fnloc.value, self.fnloc.value, 0)

    def restore_stack_pointer(self):
        if self.current_sp == 0:
            return
        self.mc.ADDI(r.sp.value, r.sp.value, self.current_sp)
        self.current_sp = 0

    def load_result(self):
        resloc = self.resloc
        if self.restype == 'S':
            assert False, 'unimplemented'
        elif self.restype == 'L':
            assert False, 'unimplemented'
        if resloc is not None and resloc.is_core_reg():
            self._ensure_result_bit_extension(resloc, self.ressize,
                                              self.ressign)

    def _ensure_result_bit_extension(self, resloc, size, signed):
        if size == XLEN:
            return
        assert XLEN == 8, 'implementation below assumes 64-bit backend'
        if size == 4:
            if signed:
                self.mc.SLLI(resloc.value, resloc.value, 32)
                self.mc.SRAI(resloc.value, resloc.value, 32)
            else:
                self.mc.SLLI(resloc.value, resloc.value, 32)
                self.mc.SRLI(resloc.value, resloc.value, 32)
        elif size == 2:
            if signed:
                self.mc.SLLI(resloc.value, resloc.value, 48)
                self.mc.SRAI(resloc.value, resloc.value, 48)
            else:
                self.mc.SLLI(resloc.value, resloc.value, 48)
                self.mc.SRLI(resloc.value, resloc.value, 48)
        elif size == 1:
            if not signed:
                self.mc.ANDI(resloc.value, resloc.value, 0xFF)
            else:
                self.mc.SLLI(resloc.value, resloc.value, 56)
                self.mc.SRAI(resloc.value, resloc.value, 56)

    def call_releasegil_addr_and_move_real_arguments(self, fastgil):
        assert self.is_call_release_gil
        assert not self.asm._is_asmgcc()

        # `r.thread_id` holds our thread identifier.
        # `r.shadow_old` holds the old value of the shadow stack pointer, which
        # we save here for later comparison.

        scratch_reg = r.x31

        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if gcrootmap:
            rst = gcrootmap.get_root_stack_top_addr()
            self.mc.load_int_imm(scratch_reg.value, rst)
            self.mc.load_int(r.shadow_old.value, scratch_reg.value, 0)

        # Change `rpy_fastgil` to 0 (it should be non-zero right now) and save
        # the old value of `rpy_fastgil` into `r.thread_id`.
        self.mc.load_int_imm(scratch_reg.value, fastgil)
        self.mc.load_int(r.thread_id.value, scratch_reg.value, 0)

        # atomic_store_int(0, &rpy_fastgil, mo_release)
        self.mc.atomic_swap_int(r.x0.value, r.x0.value, scratch_reg.value,
                                AMO_RELEASE)

        if not we_are_translated():
            # For testing, we should not access the jfp register any more.
            self.mc.ADDI(r.jfp.value, r.jfp.value, 1)

    def write_real_errno(self, save_err):
        # Use caller-saved registers as scratch registers.
        #
        # Note: Skip x10-x17 registers because they contain the arguments to
        # the future call.
        tls_reg = r.x31
        addr_reg = r.x30
        scratch_reg = r.x29

        if save_err & rffi.RFFI_READSAVED_ERRNO:
            # Just before a call, read `*_errno` and write it into the real
            # `errno`.

            if save_err & rffi.RFFI_ALT_ERRNO:
                rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
            else:
                rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)

            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)

            # TODO: Replace saved_threadlocal_addr with RISCV architecture `tp`
            # (thread pointer) register.
            self.mc.load_int(tls_reg.value, r.sp.value,
                             self.asm.saved_threadlocal_addr + self.current_sp)
            self.mc.load_int_from_base_plus_offset(addr_reg.value,
                                                   tls_reg.value, p_errno)
            self.mc.load_rffi_int_from_base_plus_offset(scratch_reg.value,
                                                        tls_reg.value,
                                                        rpy_errno)
            self.mc.store_rffi_int(scratch_reg.value, addr_reg.value, 0)
        elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
            # Same, but write zero.
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            self.mc.load_int(tls_reg.value, r.sp.value,
                             self.asm.saved_threadlocal_addr + self.current_sp)
            self.mc.load_int_from_base_plus_offset(addr_reg.value,
                                                   tls_reg.value, p_errno)
            self.mc.store_rffi_int(r.x0.value, addr_reg.value, 0)

    def read_real_errno(self, save_err):
        if save_err & rffi.RFFI_SAVE_ERRNO:
            # Just after a call, read the real `errno` and save a copy of
            # it inside our thread-local `*_errno`.

            # Use caller-saved registers as scratch registers.
            tls_reg = r.x30
            scratch_reg = r.x31
            scratch2_reg = r.x29

            if save_err & rffi.RFFI_ALT_ERRNO:
                rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
            else:
                rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)

            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)

            self.mc.load_int(tls_reg.value, r.sp.value,
                             self.asm.saved_threadlocal_addr)
            self.mc.load_int_from_base_plus_offset(scratch_reg.value,
                                                   tls_reg.value, p_errno)
            self.mc.load_rffi_int(scratch_reg.value, scratch_reg.value, 0)
            self.mc.store_rffi_int_to_base_plus_offset(scratch_reg.value,
                                                       tls_reg.value,
                                                       rpy_errno,
                                                       tmp=scratch2_reg.value)

    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        # Try to reacquire the lock. The following two values are saved across
        # the call and are still alive now:
        #
        # r.thread_id   # our thread ident
        # r.shadow_old  # old value of the shadowstack pointer

        # Scratch registers (these must be caller-saved registers)
        scratch_reg = r.x31
        rpy_fastgil_adr_reg = r.x30
        old_fastgil_reg = r.x29

        # Load the address of rpy_fastgil.
        self.mc.load_int_imm(rpy_fastgil_adr_reg.value, fastgil)

        # Compare-and-swap rpy_fastgil:
        #
        # atomic_compare_exchange_strong(old=0, new=r.thread_id,
        #                                addr=&rpy_fastgil)
        self.mc.load_reserve_int(old_fastgil_reg.value,
                                 rpy_fastgil_adr_reg.value,
                                 AMO_ACQUIRE | AMO_RELEASE)
        self.mc.BNEZ(old_fastgil_reg.value, 12)
        self.mc.store_conditional_int(scratch_reg.value, r.thread_id.value,
                                      rpy_fastgil_adr_reg.value,
                                      AMO_ACQUIRE | AMO_RELEASE)
        self.mc.BNEZ(scratch_reg.value, -12)  # Re-try for spurious SC failure.

        # Now, the `old_fastgil_reg` keeps the old value of the lock, and if
        # `old_fastgil_reg == 0` then the lock now contains `r.thread_id`.

        # Patch Location:
        # - boehm: `BEQZ old_fastgil_reg, end`
        # - shadowstack: `BNEZ old_fastgil_reg, reacqgil_slowpath`
        b1_location = self.mc.get_relative_pos()
        self.mc.EBREAK()

        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if gcrootmap:
            # When doing a call_release_gil with shadowstack, there is the risk
            # that the `rpy_fastgil` was free but the current shadowstack can
            # be the one of a different thread. So here we check if the
            # shadowstack pointer is still the same as before we released the
            # GIL (saved in `r.shadow_old`), and if not, we fall back to
            # `reacqgil_addr`.
            rst = gcrootmap.get_root_stack_top_addr()
            self.mc.load_int_imm(scratch_reg.value, rst)
            self.mc.load_int(scratch_reg.value, scratch_reg.value, 0)

            # Patch Location: `BEQ scratch_reg, r.shadow_old, end`
            b3_location = self.mc.get_relative_pos()
            self.mc.EBREAK()

            # Revert the rpy_fastgil acquired above, so that the general
            # `self.asm.reacqgil_addr` below can acquire it again.
            #
            # atomic_store_int(0, &rpy_fastgil, mo_release)
            self.mc.atomic_swap_int(r.x0.value, r.x0.value,
                                    rpy_fastgil_adr_reg.value, AMO_RELEASE)

            # Patch the b1_location above.
            pmc = OverwritingBuilder(self.mc, b1_location, INST_SIZE)
            pmc.BNEZ(old_fastgil_reg.value,
                     self.mc.get_relative_pos() - b1_location)

            open_location = b3_location
        else:
            open_location = b1_location

        # LABEL[reacqgil_slowpath]:
        #
        # Save the result value across `reacqgil`.
        saved_res = r.thread_id  # Reuse `r.thread_id` to save things
        reg = self.resloc
        if reg is not None:
            if reg.is_core_reg():
                self.mc.MV(saved_res.value, reg.value)
            elif reg.is_fp_reg():
                assert XLEN == FLEN
                self.mc.FMV_X_D(saved_res.value, reg.value)

        # Call the `reacqgil` function.
        self.mc.load_int_imm(r.ra.value, self.asm.reacqgil_addr)
        self.mc.JALR(r.ra.value, r.ra.value, 0)

        # Restore the saved register
        if reg is not None:
            if reg.is_core_reg():
                self.mc.MV(reg.value, saved_res.value)
            elif reg.is_fp_reg():
                assert XLEN == FLEN
                self.mc.FMV_D_X(reg.value, saved_res.value)

        # LABEL[end]:
        #
        # Patch the `open_location` jump above:
        pmc = OverwritingBuilder(self.mc, open_location, INST_SIZE)
        offset = self.mc.get_relative_pos() - open_location
        if gcrootmap:
            pmc.BEQ(scratch_reg.value, r.shadow_old.value, offset)
        else:
            pmc.BEQZ(old_fastgil_reg.value, offset)

        if not we_are_translated():
            # For testing, now we can access the jfp register again.
            self.mc.ADDI(r.jfp.value, r.jfp.value, -1)