File: btl_uct_amo.c

package info (click to toggle)
openmpi 5.0.7-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 202,312 kB
  • sloc: ansic: 612,441; makefile: 42,495; sh: 11,230; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,154; python: 1,856; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (194 lines) | stat: -rw-r--r-- 7,885 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#include "btl_uct_device_context.h"

#if OPAL_HAVE_UCT_EP_ATOMIC64_POST
/* we add 1 to the ops to differentiate between unsupported and supported ops since
 * UCT_ATOMIC_OP_ADD == 0. otherwise we would have to fill in this table completely. */
static int mca_btl_uct_btl_to_uct_atomic[MCA_BTL_ATOMIC_LAST] = {
    [MCA_BTL_ATOMIC_ADD] = UCT_ATOMIC_OP_ADD + 1,   [MCA_BTL_ATOMIC_AND] = UCT_ATOMIC_OP_AND + 1,
    [MCA_BTL_ATOMIC_OR] = UCT_ATOMIC_OP_OR + 1,     [MCA_BTL_ATOMIC_XOR] = UCT_ATOMIC_OP_XOR + 1,
    [MCA_BTL_ATOMIC_SWAP] = UCT_ATOMIC_OP_SWAP + 1,
};
#endif

int mca_btl_uct_afop(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                     void *local_address, uint64_t remote_address,
                     mca_btl_base_registration_handle_t *local_handle,
                     mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
                     uint64_t operand, int flags, int order,
                     mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
    mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
    mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context(uct_btl);
    mca_btl_uct_uct_completion_t *comp = NULL;
    ucs_status_t ucs_status;
    uct_rkey_bundle_t rkey;
    uct_ep_h ep_handle;
    int rc;

#if OPAL_HAVE_UCT_EP_ATOMIC64_POST
    int uct_op = mca_btl_uct_btl_to_uct_atomic[op];

    if (OPAL_UNLIKELY(0 == uct_op--)) {
        return OPAL_ERR_BAD_PARAM;
    }
#else
    if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op && MCA_BTL_ATOMIC_SWAP != op)) {
        return OPAL_ERR_BAD_PARAM;
    }
#endif

    if (cbfunc) {
        comp = mca_btl_uct_uct_completion_alloc(uct_btl, endpoint, local_address, local_handle,
                                                context, cbfunc, cbcontext, cbdata);
        if (OPAL_UNLIKELY(NULL == comp)) {
            return OPAL_ERR_OUT_OF_RESOURCE;
        }
    }

    rc = mca_btl_uct_get_rkey(uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        mca_btl_uct_uct_completion_release(comp);
        return rc;
    }

    mca_btl_uct_context_lock(context);

#if OPAL_HAVE_UCT_EP_ATOMIC64_POST
    if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
        ucs_status = uct_ep_atomic32_fetch(ep_handle, uct_op, operand, (uint32_t *) local_address,
                                           remote_address, rkey.rkey, &comp->uct_comp);
    } else {
        ucs_status = uct_ep_atomic64_fetch(ep_handle, uct_op, operand, (uint64_t *) local_address,
                                           remote_address, rkey.rkey, &comp->uct_comp);
    }
#else
    if (MCA_BTL_ATOMIC_ADD == op) {
        if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
            ucs_status = uct_ep_atomic_fadd32(ep_handle, (uint32_t) operand, remote_address,
                                              rkey.rkey, (uint32_t *) local_address,
                                              &comp->uct_comp);
        } else {
            ucs_status = uct_ep_atomic_fadd64(ep_handle, operand, remote_address, rkey.rkey,
                                              (uint64_t *) local_address, &comp->uct_comp);
        }
    } else {
        if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
            ucs_status = uct_ep_atomic_swap32(ep_handle, (uint32_t) operand, remote_address,
                                              rkey.rkey, (uint32_t *) local_address,
                                              &comp->uct_comp);
        } else {
            ucs_status = uct_ep_atomic_swap64(ep_handle, operand, remote_address, rkey.rkey,
                                              (uint64_t *) local_address, &comp->uct_comp);
        }
    }
#endif

    /* go ahead and progress the worker while we have the lock */
    (void) uct_worker_progress(context->uct_worker);

    mca_btl_uct_context_unlock(context);

    mca_btl_uct_device_handle_completions(context);

    if (UCS_INPROGRESS == ucs_status) {
        rc = OPAL_SUCCESS;
    } else if (UCS_OK == ucs_status) {
        rc = 1;
        mca_btl_uct_uct_completion_release(comp);
    } else {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        mca_btl_uct_uct_completion_release(comp);
    }

    mca_btl_uct_rkey_release(uct_btl, &rkey);

    return rc;
}

int mca_btl_uct_aop(struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
                    uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
                    mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
                    mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
    /* this is static so it survives after this function returns. we don't care about the result */
    static uint64_t result;

    /* just use the fetching ops for now. there probably is a performance benefit to using
     * the non-fetching on some platforms but this is easier to implement quickly and it
     * guarantees remote completion. */
    return mca_btl_uct_afop(btl, endpoint, &result, remote_address, NULL, remote_handle, op,
                            operand, flags, order, cbfunc, cbcontext, cbdata);
}

int mca_btl_uct_acswap(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                       void *local_address, uint64_t remote_address,
                       mca_btl_base_registration_handle_t *local_handle,
                       mca_btl_base_registration_handle_t *remote_handle, uint64_t compare,
                       uint64_t value, int flags, int order,
                       mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
    mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
    mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context(uct_btl);
    mca_btl_uct_uct_completion_t *comp = NULL;
    ucs_status_t ucs_status;
    uct_rkey_bundle_t rkey;
    uct_ep_h ep_handle;
    int rc;

    if (cbfunc) {
        comp = mca_btl_uct_uct_completion_alloc(uct_btl, endpoint, local_address, local_handle,
                                                context, cbfunc, cbcontext, cbdata);
        if (OPAL_UNLIKELY(NULL == comp)) {
            return OPAL_ERR_OUT_OF_RESOURCE;
        }
    }

    rc = mca_btl_uct_get_rkey(uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        mca_btl_uct_uct_completion_release(comp);
        return rc;
    }

    mca_btl_uct_context_lock(context);

    if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
        ucs_status = uct_ep_atomic_cswap32(ep_handle, (uint32_t) compare, (uint32_t) value,
                                           remote_address, rkey.rkey, (uint32_t *) local_address,
                                           &comp->uct_comp);
    } else {
        ucs_status = uct_ep_atomic_cswap64(ep_handle, compare, value, remote_address, rkey.rkey,
                                           (uint64_t *) local_address, &comp->uct_comp);
    }

    /* go ahead and progress the worker while we have the lock */
    (void) uct_worker_progress(context->uct_worker);

    mca_btl_uct_context_unlock(context);

    mca_btl_uct_device_handle_completions(context);

    if (UCS_INPROGRESS == ucs_status) {
        rc = OPAL_SUCCESS;
    } else if (UCS_OK == ucs_status) {
        rc = 1;
        mca_btl_uct_uct_completion_release(comp);
    } else {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        mca_btl_uct_uct_completion_release(comp);
    }

    mca_btl_uct_rkey_release(uct_btl, &rkey);

    return rc;
}