File: asmmemmgr.py

package info (click to toggle)
pypy 5.6.0%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 97,040 kB
  • ctags: 185,069
  • sloc: python: 1,147,862; ansic: 49,642; cpp: 5,245; asm: 5,169; makefile: 529; sh: 481; xml: 232; lisp: 45
file content (352 lines) | stat: -rw-r--r-- 13,365 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
import sys
from rpython.rlib.rarithmetic import intmask, r_uint, LONG_BIT
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib import rmmap
from rpython.rlib.debug import debug_start, debug_print, debug_stop
from rpython.rlib.debug import have_debug_prints
from rpython.rtyper.lltypesystem import lltype, rffi


class AsmMemoryManager(object):
    LARGE_ALLOC_SIZE = 1024 * 1024   # 1MB
    MIN_FRAGMENT = 64
    NUM_INDICES = 32     # good for all sizes between 64 bytes and ~490 KB
    _allocated = None

    def __init__(self, large_alloc_size = LARGE_ALLOC_SIZE,
                       min_fragment     = MIN_FRAGMENT,
                       num_indices      = NUM_INDICES):
        self.total_memory_allocated = r_uint(0)
        self.total_mallocs = r_uint(0)
        self.large_alloc_size = large_alloc_size
        self.min_fragment = min_fragment
        self.num_indices = num_indices
        self.free_blocks = {}      # map {start: stop}
        self.free_blocks_end = {}  # map {stop: start}
        self.blocks_by_size = [[] for i in range(self.num_indices)]

    def get_stats(self):
        """Returns stats for rlib.jit.jit_hooks.stats_asmmemmgr_*()."""
        return (self.total_memory_allocated, self.total_mallocs)

    def malloc(self, minsize, maxsize):
        """Allocate executable memory, between minsize and maxsize bytes,
        and return a pair (start, stop).  Does not perform any rounding
        of minsize and maxsize.
        """
        result = self._allocate_block(minsize)
        (start, stop) = result
        smaller_stop = start + maxsize
        if smaller_stop + self.min_fragment <= stop:
            self._add_free_block(smaller_stop, stop)
            stop = smaller_stop
            result = (start, stop)
        self.total_mallocs += r_uint(stop - start)
        return result   # pair (start, stop)

    def free(self, start, stop):
        """Free a block (start, stop) returned by a previous malloc()."""
        if r_uint is not None:
            self.total_mallocs -= r_uint(stop - start)
        self._add_free_block(start, stop)

    def open_malloc(self, minsize):
        """Allocate at least minsize bytes.  Returns (start, stop)."""
        result = self._allocate_block(minsize)
        (start, stop) = result
        self.total_mallocs += r_uint(stop - start)
        return result

    def open_free(self, middle, stop):
        """Used for freeing the end of an open-allocated block of memory."""
        if stop - middle >= self.min_fragment:
            self.total_mallocs -= r_uint(stop - middle)
            self._add_free_block(middle, stop)
            return True
        else:
            return False    # too small to record

    def _allocate_large_block(self, minsize):
        # Compute 'size' from 'minsize': it must be rounded up to
        # 'large_alloc_size'.  Additionally, we use the following line
        # to limit how many mmap() requests the OS will see in total:
        minsize = max(minsize, intmask(self.total_memory_allocated >> 4))
        size = minsize + self.large_alloc_size - 1
        size = (size // self.large_alloc_size) * self.large_alloc_size
        data = rmmap.alloc(size)
        if not we_are_translated():
            if self._allocated is None:
                self._allocated = []
            self._allocated.append((data, size))
            if sys.maxint > 2147483647:
                # Hack to make sure that mcs are not within 32-bits of one
                # another for testing purposes
                rmmap.hint.pos += 0x80000000 - size
        self.total_memory_allocated += r_uint(size)
        data = rffi.cast(lltype.Signed, data)
        return self._add_free_block(data, data + size)

    def _get_index(self, length):
        i = 0
        while length > self.min_fragment:
            length = (length * 3) >> 2
            i += 1
            if i == self.num_indices - 1:
                break
        return i

    def _add_free_block(self, start, stop):
        # Merge with the block on the left
        if start in self.free_blocks_end:
            left_start = self.free_blocks_end[start]
            self._del_free_block(left_start, start)
            start = left_start
        # Merge with the block on the right
        if stop in self.free_blocks:
            right_stop = self.free_blocks[stop]
            self._del_free_block(stop, right_stop)
            stop = right_stop
        # Add it to the dicts
        self.free_blocks[start] = stop
        self.free_blocks_end[stop] = start
        i = self._get_index(stop - start)
        self.blocks_by_size[i].append(start)
        return start

    def _del_free_block(self, start, stop):
        del self.free_blocks[start]
        del self.free_blocks_end[stop]
        i = self._get_index(stop - start)
        self.blocks_by_size[i].remove(start)

    def _allocate_block(self, length):
        # First look in the group of index i0 if there is a block that is
        # big enough.  Following an idea found in the Linux malloc.c, we
        # prefer the oldest entries rather than the newest one, to let
        # them have enough time to coalesce into bigger blocks.  It makes
        # a big difference on the purely random test (30% of total usage).
        i0 = self._get_index(length)
        bbs = self.blocks_by_size[i0]
        for j in range(len(bbs)):
            start = bbs[j]
            stop = self.free_blocks[start]
            if start + length <= stop:
                del bbs[j]
                break   # found a block big enough
        else:
            # Then look in the larger groups
            i = i0 + 1
            while i < self.num_indices:
                if len(self.blocks_by_size[i]) > 0:
                    # any block found in a larger group is big enough
                    start = self.blocks_by_size[i].pop(0)
                    stop = self.free_blocks[start]
                    break
                i += 1
            else:
                # Exhausted the memory.  Allocate the resulting block.
                start = self._allocate_large_block(length)
                stop = self.free_blocks[start]
                i = self._get_index(stop - start)
                assert self.blocks_by_size[i][-1] == start
                self.blocks_by_size[i].pop()
        #
        del self.free_blocks[start]
        del self.free_blocks_end[stop]
        return (start, stop)

    def _delete(self):
        "NOT_RPYTHON"
        if self._allocated:
            for data, size in self._allocated:
                rmmap.free(data, size)
        self._allocated = None


class MachineDataBlockWrapper(object):
    def __init__(self, asmmemmgr, allblocks):
        self.asmmemmgr = asmmemmgr
        self.allblocks = allblocks
        self.rawstart    = 0
        self.rawposition = 0
        self.rawstop     = 0

    def done(self):
        if self.rawstart != 0:
            if self.asmmemmgr.open_free(self.rawposition, self.rawstop):
                self.rawstop = self.rawposition
            self.allblocks.append((self.rawstart, self.rawstop))
            self.rawstart    = 0
            self.rawposition = 0
            self.rawstop     = 0

    def _allocate_next_block(self, minsize):
        self.done()
        self.rawstart, self.rawstop = self.asmmemmgr.open_malloc(minsize)
        self.rawposition = self.rawstart

    def malloc_aligned(self, size, alignment):
        p = self.rawposition
        p = (p + alignment - 1) & (-alignment)
        if p + size > self.rawstop:
            self._allocate_next_block(size + alignment - 1)
            p = self.rawposition
            p = (p + alignment - 1) & (-alignment)
            assert p + size <= self.rawstop
        self.rawposition = p + size
        return p


class BlockBuilderMixin(object):
    _mixin_ = True
    # A base class to generate assembler.  It is equivalent to just a list
    # of chars, but it is potentially more efficient for that usage.
    # It works by allocating the assembler SUBBLOCK_SIZE bytes at a time.
    # Ideally, this number should be a power of two that fits the GC's most
    # compact allocation scheme (which is so far 35 * WORD for minimark.py).
    WORD = LONG_BIT // 8
    SUBBLOCK_SIZE = 32 * WORD
    SUBBLOCK_PTR = lltype.Ptr(lltype.GcForwardReference())
    SUBBLOCK = lltype.GcStruct('SUBBLOCK',
                   ('prev', SUBBLOCK_PTR),
                   ('data', lltype.FixedSizeArray(lltype.Char, SUBBLOCK_SIZE)))
    SUBBLOCK_PTR.TO.become(SUBBLOCK)

    ALIGN_MATERIALIZE = 16

    gcroot_markers = None

    def __init__(self, translated=None):
        if translated is None:
            translated = we_are_translated()
        if translated:
            self.init_block_builder()
        else:
            self._become_a_plain_block_builder()
        self.rawstart = 0

    def init_block_builder(self):
        self._cursubblock = lltype.nullptr(self.SUBBLOCK)
        self._baserelpos = -self.SUBBLOCK_SIZE
        self._make_new_subblock()

    def _make_new_subblock(self):
        nextsubblock = lltype.malloc(self.SUBBLOCK)
        nextsubblock.prev = self._cursubblock
        self._cursubblock = nextsubblock
        self._cursubindex = 0
        self._baserelpos += self.SUBBLOCK_SIZE
    _make_new_subblock._dont_inline_ = True

    def writechar(self, char):
        index = self._cursubindex
        if index == self.SUBBLOCK_SIZE:
            self._make_new_subblock()
            index = 0
        self._cursubblock.data[index] = char
        self._cursubindex = index + 1

    def absolute_addr(self):
        return self.rawstart

    def overwrite(self, index, char):
        assert 0 <= index < self.get_relative_pos()
        block = self._cursubblock
        index -= self._baserelpos
        while index < 0:
            block = block.prev
            index += self.SUBBLOCK_SIZE
        block.data[index] = char

    def overwrite32(self, index, val):
        self.overwrite(index, chr(val & 0xff))
        self.overwrite(index + 1, chr((val >> 8) & 0xff))
        self.overwrite(index + 2, chr((val >> 16) & 0xff))
        self.overwrite(index + 3, chr((val >> 24) & 0xff))

    def get_relative_pos(self):
        return self._baserelpos + self._cursubindex

    def copy_to_raw_memory(self, addr):
        # indirection for _become_a_plain_block_builder() and for subclasses
        self._copy_to_raw_memory(addr)

    def _copy_to_raw_memory(self, addr):
        block = self._cursubblock
        blocksize = self._cursubindex
        targetindex = self._baserelpos
        while targetindex >= 0:
            dst = rffi.cast(rffi.CCHARP, addr + targetindex)
            for j in range(blocksize):
                dst[j] = block.data[j]
            block = block.prev
            blocksize = self.SUBBLOCK_SIZE
            targetindex -= self.SUBBLOCK_SIZE
        assert not block

    def copy_core_dump(self, addr, offset=0, count=-1):
        HEX = '0123456789ABCDEF'
        dump = []
        src = rffi.cast(rffi.CCHARP, addr)
        end = self.get_relative_pos()
        if count != -1:
            end = offset + count
        for p in range(offset, end):
            o = ord(src[p])
            dump.append(HEX[o >> 4])
            dump.append(HEX[o & 15])
        return ''.join(dump)

    def _dump(self, addr, logname, backend=None):
        debug_start(logname)
        if have_debug_prints():
            #
            if backend is not None:
                debug_print('BACKEND', backend)
            #
            from rpython.jit.backend.hlinfo import highleveljitinfo
            if highleveljitinfo.sys_executable:
                debug_print('SYS_EXECUTABLE', highleveljitinfo.sys_executable)
            else:
                debug_print('SYS_EXECUTABLE', '??')
            #
            dump = self.copy_core_dump(addr)
            debug_print('CODE_DUMP',
                        '@%x' % addr,
                        '+0 ',     # backwards compatibility
                        dump)
            #
        debug_stop(logname)

    def materialize(self, cpu, allblocks, gcrootmap=None):
        size = self.get_relative_pos()
        align = self.ALIGN_MATERIALIZE
        size += align - 1
        malloced = cpu.asmmemmgr.malloc(size, size)
        allblocks.append(malloced)
        rawstart = malloced[0]
        rawstart = (rawstart + align - 1) & (-align)
        self.rawstart = rawstart
        self.copy_to_raw_memory(rawstart)
        if self.gcroot_markers is not None:
            assert gcrootmap is not None
            for pos, mark in self.gcroot_markers:
                gcrootmap.register_asm_addr(rawstart + pos, mark)
        return rawstart

    def _become_a_plain_block_builder(self):
        # hack purely for speed of tests
        self._data = []
        self.writechar = self._data.append
        self.overwrite = self._data.__setitem__
        self.get_relative_pos = self._data.__len__
        def plain_copy_to_raw_memory(addr):
            dst = rffi.cast(rffi.CCHARP, addr)
            for i, c in enumerate(self._data):
                dst[i] = c
        self._copy_to_raw_memory = plain_copy_to_raw_memory

    def insert_gcroot_marker(self, mark):
        if self.gcroot_markers is None:
            self.gcroot_markers = []
        self.gcroot_markers.append((self.get_relative_pos(), mark))