File: opal_free_list.c

package info (click to toggle)
openmpi 5.0.8-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 201,684 kB
  • sloc: ansic: 613,078; makefile: 42,353; sh: 11,194; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,179; python: 1,859; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (321 lines) | stat: -rw-r--r-- 11,712 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2009 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2010-2013 Cisco Systems, Inc.  All rights reserved.
 * Copyright (c) 2011      NVIDIA Corporation.  All rights reserved.
 * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#include "opal_config.h"

#include "opal/align.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/util/output.h"
#include "opal/util/sys_limits.h"

typedef struct opal_free_list_item_t opal_free_list_memory_t;

OBJ_CLASS_INSTANCE(opal_free_list_item_t, opal_list_item_t, NULL, NULL);

static void opal_free_list_construct(opal_free_list_t *fl)
{
    OBJ_CONSTRUCT(&fl->fl_lock, opal_mutex_t);
    OBJ_CONSTRUCT(&fl->fl_condition, opal_condition_t);
    fl->fl_max_to_alloc = 0;
    fl->fl_num_allocated = 0;
    fl->fl_num_per_alloc = 0;
    fl->fl_num_waiting = 0;
    fl->fl_frag_size = sizeof(opal_free_list_item_t);
    fl->fl_frag_alignment = 0;
    fl->fl_payload_buffer_size = 0;
    fl->fl_payload_buffer_alignment = 0;
    fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t);
    fl->fl_mpool = NULL;
    fl->fl_rcache = NULL;
    /* default flags */
    fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM;
    fl->ctx = NULL;
    OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
}

static void opal_free_list_allocation_release(opal_free_list_t *fl, opal_free_list_memory_t *fl_mem)
{
    if (NULL != fl->fl_rcache) {
        fl->fl_rcache->rcache_deregister(fl->fl_rcache, fl_mem->registration);
    }

    if (NULL != fl->fl_mpool) {
        fl->fl_mpool->mpool_free(fl->fl_mpool, fl_mem->ptr);
    } else if (fl_mem->ptr) {
        free(fl_mem->ptr);
    }

    /* destruct the item (we constructed it), then free the memory chunk */
    OBJ_DESTRUCT(fl_mem);
    free(fl_mem);
}

static void opal_free_list_destruct(opal_free_list_t *fl)
{
    opal_list_item_t *item;
    opal_free_list_item_t *fl_item;

#if 0 && OPAL_ENABLE_DEBUG
    if(opal_list_get_size(&fl->super) != fl->fl_num_allocated) {
        opal_output(0, "opal_free_list: %d allocated %d returned: %s:%d\n",
            fl->fl_num_allocated, opal_list_get_size(&fl->super),
            fl->super.super.cls_init_file_name, fl->super.super.cls_init_lineno);
    }
#endif

    while (NULL != (item = opal_lifo_pop(&(fl->super)))) {
        fl_item = (opal_free_list_item_t *) item;

        /* destruct the item (we constructed it), the underlying memory will be
         * reclaimed when we free the slab (opal_free_list_memory_t ptr)
         * containing it */
        OBJ_DESTRUCT(fl_item);
    }

    while (NULL != (item = opal_list_remove_first(&fl->fl_allocations))) {
        opal_free_list_allocation_release(fl, (opal_free_list_memory_t *) item);
    }

    OBJ_DESTRUCT(&fl->fl_allocations);
    OBJ_DESTRUCT(&fl->fl_condition);
    OBJ_DESTRUCT(&fl->fl_lock);
}

OBJ_CLASS_INSTANCE(opal_free_list_t, opal_lifo_t, opal_free_list_construct,
                   opal_free_list_destruct);

int opal_free_list_init(opal_free_list_t *flist, size_t frag_size, size_t frag_alignment,
                        opal_class_t *frag_class, size_t payload_buffer_size,
                        size_t payload_buffer_alignment, int num_elements_to_alloc,
                        int max_elements_to_alloc, int num_elements_per_alloc,
                        mca_mpool_base_module_t *mpool, int rcache_reg_flags,
                        mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init,
                        void *ctx)
{
    /* alignment must be more than zero and power of two */
    if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) {
        return OPAL_ERROR;
    }

    if (0 < payload_buffer_size) {
        if (payload_buffer_alignment <= 1
            || (payload_buffer_alignment & (payload_buffer_alignment - 1))) {
            return OPAL_ERROR;
        }
    }

    if (frag_class && frag_size < frag_class->cls_sizeof) {
        frag_size = frag_class->cls_sizeof;
    }

    if (frag_size > flist->fl_frag_size) {
        flist->fl_frag_size = frag_size;
    }

    if (frag_class) {
        flist->fl_frag_class = frag_class;
    }

    flist->fl_payload_buffer_size = payload_buffer_size;
    flist->fl_max_to_alloc = max_elements_to_alloc;
    flist->fl_num_allocated = 0;
    flist->fl_num_per_alloc = num_elements_per_alloc;
    flist->fl_mpool = mpool ? mpool : mca_mpool_base_default_module;
    flist->fl_rcache = rcache;
    flist->fl_frag_alignment = frag_alignment;
    flist->fl_payload_buffer_alignment = payload_buffer_alignment;
    flist->item_init = item_init;
    flist->fl_rcache_reg_flags |= rcache_reg_flags;
    flist->ctx = ctx;

    if (num_elements_to_alloc) {
        return opal_free_list_grow_st(flist, num_elements_to_alloc, NULL);
    }

    return OPAL_SUCCESS;
}

int opal_free_list_grow_st(opal_free_list_t *flist, size_t num_elements,
                           opal_free_list_item_t **item_out)
{
    unsigned char *ptr, *payload_ptr = NULL;
    opal_free_list_memory_t *alloc_ptr;
    size_t alloc_size, head_size, elem_size = 0, buffer_size = 0, align = 0;
    mca_rcache_base_registration_t *reg = NULL;
    int rc = OPAL_SUCCESS;

    if (flist->fl_max_to_alloc
        && (flist->fl_num_allocated + num_elements) > flist->fl_max_to_alloc) {
        num_elements = flist->fl_max_to_alloc - flist->fl_num_allocated;
    }

    if (num_elements == 0) {
        return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
    }

    head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t);

    /* NTH: calculate allocation alignment first as it might change the number of elements */
    if (0 != flist->fl_payload_buffer_size) {
        elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size, flist->fl_payload_buffer_alignment,
                               size_t);

        /* elem_size should not be 0 here */
        assert(elem_size > 0);

        buffer_size = num_elements * elem_size;
        align = flist->fl_payload_buffer_alignment;

        if (MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM & flist->fl_rcache_reg_flags) {
            size_t pagesize = opal_getpagesize();
            /* CUDA cannot handle registering overlapping regions, so make
             * sure each region is page sized and page aligned. */
            align = OPAL_ALIGN(align, pagesize, size_t);
            buffer_size = OPAL_ALIGN(buffer_size, pagesize, size_t);

            /* avoid wasting space in the buffer */
            num_elements = buffer_size / elem_size;
        }
    }

    /* calculate head allocation size */
    alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t)
                 + flist->fl_frag_alignment;

    alloc_ptr = (opal_free_list_memory_t *) malloc(alloc_size);
    if (OPAL_UNLIKELY(NULL == alloc_ptr)) {
        return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
    }

    if (0 != flist->fl_payload_buffer_size) {
        /* allocate the rest from the mpool (or use memalign/malloc) */
        payload_ptr = (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, buffer_size,
                                                                     align, 0);
        if (NULL == payload_ptr) {
            free(alloc_ptr);
            return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
        }

        if (flist->fl_rcache) {
            rc = flist->fl_rcache->rcache_register(flist->fl_rcache, payload_ptr,
                                                   num_elements * elem_size,
                                                   flist->fl_rcache_reg_flags,
                                                   MCA_RCACHE_ACCESS_ANY, &reg);
            if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
                free(alloc_ptr);
                flist->fl_mpool->mpool_free(flist->fl_mpool, payload_ptr);

                return rc;
            }
        }
    }

    /* make the alloc_ptr a list item, save the chunk in the allocations list,
     * and have ptr point to memory right after the list item structure */
    OBJ_CONSTRUCT(alloc_ptr, opal_free_list_item_t);
    opal_list_append(&(flist->fl_allocations), (opal_list_item_t *) alloc_ptr);

    alloc_ptr->registration = reg;
    alloc_ptr->ptr = payload_ptr;

    ptr = (unsigned char *) alloc_ptr + sizeof(opal_free_list_memory_t);
    ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char *);

    for (size_t i = 0; i < num_elements; ++i) {
        opal_free_list_item_t *item = (opal_free_list_item_t *) ptr;
        item->registration = reg;
        item->ptr = payload_ptr;

        OBJ_CONSTRUCT_INTERNAL(item, flist->fl_frag_class);
        item->super.item_free = 0;

        /* run the initialize function if present */
        if (flist->item_init) {
            if (OPAL_SUCCESS != (rc = flist->item_init(item, flist->ctx))) {
                num_elements = i;
                OBJ_DESTRUCT(item);
                break;
            }
        }

        /* NTH: in case the free list may be accessed from multiple threads
         * use the atomic lifo push. The overhead is small compared to the
         * overall overhead of opal_free_list_grow(). */
        if (item_out && 0 == i) {
            /* ensure the thread that is growing the free list always gets an item
             * if one is available */
            *item_out = item;
        } else {
            opal_lifo_push_atomic(&flist->super, &item->super);
        }

        ptr += head_size;
        if (NULL != payload_ptr) {
            payload_ptr += elem_size;
        }
    }

    if (OPAL_SUCCESS != rc && 0 == num_elements) {
        /* couldn't initialize any items */
        opal_list_remove_item(&flist->fl_allocations, (opal_list_item_t *) alloc_ptr);
        opal_free_list_allocation_release(flist, alloc_ptr);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    flist->fl_num_allocated += num_elements;
    return OPAL_SUCCESS;
}

/**
 * This function resize the free_list to contain at least the specified
 * number of elements. We do not create all of them in the same memory
 * segment. Instead we will several time the fl_num_per_alloc elements
 * until we reach the required number of the maximum allowed by the
 * initialization.
 */
int opal_free_list_resize_mt(opal_free_list_t *flist, size_t size)
{
    ssize_t inc_num;
    int ret = OPAL_SUCCESS;

    if (flist->fl_num_allocated > size) {
        return OPAL_SUCCESS;
    }

    opal_mutex_lock(&flist->fl_lock);
    do {
        ret = opal_free_list_grow_st(flist, flist->fl_num_per_alloc, NULL);
        if (OPAL_SUCCESS != ret) {
            break;
        }

        inc_num = (ssize_t) size - (ssize_t) flist->fl_num_allocated;
    } while (inc_num > 0);
    opal_mutex_unlock(&flist->fl_lock);

    return ret;
}