File: btl_uct.h

package info (click to toggle)
openmpi 5.0.7-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 202,312 kB
  • sloc: ansic: 612,441; makefile: 42,495; sh: 11,230; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,154; python: 1,856; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (340 lines) | stat: -rw-r--r-- 13,154 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2009 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * Copyright (c) 2019      Google, LLC. All rights reserved.
 * Copyright (c) 2019      Intel, Inc.  All rights reserved.
 * Copyright (c) 2020      Amazon.com, Inc. or its affiliates.
 *                         All Rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */
/**
 * @file
 */
#ifndef MCA_BTL_UCT_H
#define MCA_BTL_UCT_H

#include "opal_config.h"
#include <string.h>
#include <sys/types.h>

/* Open MPI includes */
#include "opal/class/opal_fifo.h"
#include "opal/class/opal_hash_table.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/pmix/pmix-internal.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/threads/tsd.h"
#include "opal/util/event.h"
#include <uct/api/uct.h>

#include "btl_uct_types.h"

BEGIN_C_DECLS

/* detection for old vs new atomic flags */
#if defined(UCT_IFACE_FLAG_ATOMIC_ADD32)
#    define OPAL_HAVE_UCT_EP_ATOMIC64_POST 0
#else
#    define OPAL_HAVE_UCT_EP_ATOMIC64_POST 1
#endif

/**
 * @brief UCT BTL module
 */
struct mca_btl_uct_module_t {
    /** base BTL interface */
    mca_btl_base_module_t super;

    /** whether the module has been fully initialized or not */
    bool initialized;

    /** lock for the hash table */
    opal_mutex_t endpoint_lock;

    /** endpoint hash table */
    opal_hash_table_t id_to_endpoint;

    /** mutex to protect the module */
    opal_recursive_mutex_t lock;

    /** async context */
    ucs_async_context_t *ucs_async;

    /** transport for active messaging */
    mca_btl_uct_tl_t *am_tl;

    /** transport for RDMA/AMOs */
    mca_btl_uct_tl_t *rdma_tl;

    /** transport for forming connections (if needed) */
    mca_btl_uct_tl_t *conn_tl;

    /** array containing the am_tl and rdma_tl */
    mca_btl_uct_tl_t *comm_tls[2];

#if UCT_API >= UCT_VERSION(1, 7)
    uct_component_h uct_component;
#endif

    /** registration cache */
    mca_rcache_base_module_t *rcache;

    /** name of the memory domain backing this module */
    char *md_name;

    /** am and rdma share endpoints */
    bool shared_endpoints;

    /** memory domain */
    mca_btl_uct_md_t *md;

    /** un-registered frags that will be used with uct_ep_am_short() */
    opal_free_list_t short_frags;

    /** registered frags that will be used with uct_ep_am_zcopy() */
    opal_free_list_t eager_frags;

    /** large registered frags for packing non-contiguous data */
    opal_free_list_t max_frags;

    /** frags that were waiting on connections that are now ready to send */
    opal_list_t pending_frags;

    /** pending connection requests */
    opal_fifo_t pending_connection_reqs;
};
typedef struct mca_btl_uct_module_t mca_btl_uct_module_t;

extern mca_btl_uct_module_t mca_btl_uct_module_template;

/**
 * @brief UCT BTL component
 */
struct mca_btl_uct_component_t {
    /** base BTL component */
    mca_btl_base_component_3_0_0_t super;

    /** number of TL modules */
    int module_count;

    /** All BTL UCT modules (1 per memory domain) */
    mca_btl_uct_module_t *modules[MCA_BTL_UCT_MAX_MODULES];

    /** allowed UCT memory domains */
    char *memory_domains;

    /** allowed transports */
    char *allowed_transports;

    /** number of worker contexts to create */
    int num_contexts_per_module;

#if OPAL_C_HAVE__THREAD_LOCAL
    /** bind threads to contexts */
    bool bind_threads_to_contexts;
#endif

    /** disable UCX memory hooks */
    bool disable_ucx_memory_hooks;
};
typedef struct mca_btl_uct_component_t mca_btl_uct_component_t;

OPAL_DECLSPEC extern mca_btl_uct_component_t mca_btl_uct_component;

struct mca_btl_base_registration_handle_t {
    /** The packed memory handle. The size of this field is defined by UCT. */
    uint8_t packed_handle[1];
};

struct mca_btl_uct_reg_t {
    mca_rcache_base_registration_t base;

    /** UCT memory handle */
    uct_mem_h uct_memh;

    /** remote handle */
    mca_btl_base_registration_handle_t handle;
};
typedef struct mca_btl_uct_reg_t mca_btl_uct_reg_t;

OBJ_CLASS_DECLARATION(mca_btl_uct_reg_t);

#define MCA_BTL_UCT_REG_REMOTE_TO_LOCAL(reg) \
    ((mca_btl_uct_reg_t *) ((intptr_t)(reg) -offsetof(mca_btl_uct_reg_t, handle)))

/**
 * Initiate an asynchronous put.
 * Completion Semantics: if this function returns a 1 then the operation
 *                       is complete. a return of OPAL_SUCCESS indicates
 *                       the put operation has been queued with the
 *                       network. the local_handle can not be deregistered
 *                       until all outstanding operations on that handle
 *                       have been completed.
 *
 * @param btl (IN)            BTL module
 * @param endpoint (IN)       BTL addressing information
 * @param local_address (IN)  Local address to put from (registered)
 * @param remote_address (IN) Remote address to put to (registered remotely)
 * @param local_handle (IN)   Registration handle for region containing
 *                            (local_address, local_address + size)
 * @param remote_handle (IN)  Remote registration handle for region containing
 *                            (remote_address, remote_address + size)
 * @param size (IN)           Number of bytes to put
 * @param flags (IN)          Flags for this put operation
 * @param order (IN)          Ordering
 * @param cbfunc (IN)         Function to call on completion (if queued)
 * @param cbcontext (IN)      Context for the callback
 * @param cbdata (IN)         Data for callback
 *
 * @retval OPAL_SUCCESS    The descriptor was successfully queued for a put
 * @retval OPAL_ERROR      The descriptor was NOT successfully queued for a put
 * @retval OPAL_ERR_OUT_OF_RESOURCE  Insufficient resources to queue the put
 *                         operation. Try again later
 * @retval OPAL_ERR_NOT_AVAILABLE  Put can not be performed due to size or
 *                         alignment restrictions.
 */
int mca_btl_uct_put(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                    void *local_address, uint64_t remote_address,
                    struct mca_btl_base_registration_handle_t *local_handle,
                    struct mca_btl_base_registration_handle_t *remote_handle, size_t size,
                    int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext,
                    void *cbdata);

/**
 * Initiate an asynchronous get.
 * Completion Semantics: if this function returns a 1 then the operation
 *                       is complete. a return of OPAL_SUCCESS indicates
 *                       the get operation has been queued with the
 *                       network. the local_handle can not be deregistered
 *                       until all outstanding operations on that handle
 *                       have been completed.
 *
 * @param btl (IN)            BTL module
 * @param endpoint (IN)       BTL addressing information
 * @param local_address (IN)  Local address to put from (registered)
 * @param remote_address (IN) Remote address to put to (registered remotely)
 * @param local_handle (IN)   Registration handle for region containing
 *                            (local_address, local_address + size)
 * @param remote_handle (IN)  Remote registration handle for region containing
 *                            (remote_address, remote_address + size)
 * @param size (IN)           Number of bytes to put
 * @param flags (IN)          Flags for this put operation
 * @param order (IN)          Ordering
 * @param cbfunc (IN)         Function to call on completion (if queued)
 * @param cbcontext (IN)      Context for the callback
 * @param cbdata (IN)         Data for callback
 *
 * @retval OPAL_SUCCESS    The descriptor was successfully queued for a put
 * @retval OPAL_ERROR      The descriptor was NOT successfully queued for a put
 * @retval OPAL_ERR_OUT_OF_RESOURCE  Insufficient resources to queue the put
 *                         operation. Try again later
 * @retval OPAL_ERR_NOT_AVAILABLE  Put can not be performed due to size or
 *                         alignment restrictions.
 */
int mca_btl_uct_get(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                    void *local_address, uint64_t remote_address,
                    struct mca_btl_base_registration_handle_t *local_handle,
                    struct mca_btl_base_registration_handle_t *remote_handle, size_t size,
                    int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext,
                    void *cbdata);

int mca_btl_uct_aop(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                    uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
                    mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
                    mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);

int mca_btl_uct_afop(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                     void *local_address, uint64_t remote_address,
                     mca_btl_base_registration_handle_t *local_handle,
                     mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
                     uint64_t operand, int flags, int order,
                     mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);

int mca_btl_uct_acswap(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                       void *local_address, uint64_t remote_address,
                       mca_btl_base_registration_handle_t *local_handle,
                       mca_btl_base_registration_handle_t *remote_handle, uint64_t compare,
                       uint64_t value, int flags, int order,
                       mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);

int mca_btl_uct_flush(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint);
int mca_btl_uct_flush_thread(mca_btl_base_module_t *btl);

int mca_btl_uct_finalize(mca_btl_base_module_t *btl);

int mca_btl_uct_reg_mem(void *reg_data, void *base, size_t size,
                        mca_rcache_base_registration_t *reg);
int mca_btl_uct_dereg_mem(void *reg_data, mca_rcache_base_registration_t *reg);

ucs_status_t mca_btl_uct_am_handler(void *arg, void *data, size_t length, unsigned flags);

struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep(struct mca_btl_base_module_t *module,
                                                   opal_proc_t *proc);

int mca_btl_uct_query_tls(mca_btl_uct_module_t *module, mca_btl_uct_md_t *md,
                          uct_tl_resource_desc_t *tl_descs, unsigned tl_count);
int mca_btl_uct_process_connection_request(mca_btl_uct_module_t *module,
                                           mca_btl_uct_conn_req_t *req);

/**
 * @brief Checks if a tl is suitable for using for RDMA
 *
 * @param[in] tl  btl/uct tl pointer
 */
static inline bool mca_btl_uct_tl_supports_rdma(mca_btl_uct_tl_t *tl)
{
    return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags
            & (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY))
           == (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY);
}

/**
 * @brief Checks if a tl is suitable for using for active messaging
 */
static inline bool mca_btl_uct_tl_support_am(mca_btl_uct_tl_t *tl)
{
    return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags
            & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_AM_BCOPY | UCT_IFACE_FLAG_AM_ZCOPY));
}

/**
 * @brief Checks if a tl can be used for passing data to connect endpoints
 *
 * @param[in] tl  btl/uct tl pointer
 */
static inline bool mca_btl_uct_tl_supports_conn(mca_btl_uct_tl_t *tl)
{
    return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags
            & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE))
           == (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE);
}

/**
 * @brief Check if tl endpoints need to be connected via a connection tl
 *
 * @param[in] tl  btl/uct tl pointer
 */
static inline bool mca_btl_uct_tl_requires_connection_tl(mca_btl_uct_tl_t *tl)
{
    return !(MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE);
}

END_C_DECLS
#endif