File: common_ofi.h

package info (click to toggle)
openmpi 5.0.8-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 201,684 kB
  • sloc: ansic: 613,078; makefile: 42,353; sh: 11,194; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,179; python: 1,859; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (228 lines) | stat: -rw-r--r-- 8,442 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2015      Intel, Inc. All rights reserved.
 * Copyright (c) 2017      Los Alamos National Security, LLC.  All rights
 *                         reserved.
 * Copyright (c) 2020-2024 Triad National Security, LLC. All rights
 *                         reserved.
 * Copyright (c) 2021      Amazon.com, Inc. or its affiliates. All rights
 *                         reserved.
 *
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#ifndef OPAL_MCA_COMMON_OFI_H
#define OPAL_MCA_COMMON_OFI_H

#include "opal/util/proc.h"
#include "opal/memoryhooks/memory.h"

#include <rdma/fabric.h>
#include <rdma/fi_cm.h>

BEGIN_C_DECLS

typedef struct opal_common_ofi_module {
    char **prov_include;
    char **prov_exclude;
    int output;
} opal_common_ofi_module_t;

/**
 * When attempting to execute an OFI operation we need to handle
 * resource overrun cases. When a call to an OFI OP fails with -FI_EAGAIN
 * the OFI mtl/btl will attempt to progress any pending Completion Queue
 * events that may prevent additional operations to be enqueued.
 * If the call to ofi progress is successful, then the function call
 * will be retried.
 */
#define OFI_RETRY_UNTIL_DONE(FUNC, RETURN)             \
    do {                                               \
        do {                                           \
            RETURN = FUNC;                             \
            if (OPAL_LIKELY(0 == RETURN)) {break;}     \
            if (OPAL_LIKELY(RETURN == -FI_EAGAIN)) {   \
                opal_progress();                       \
            }                                          \
        } while (OPAL_LIKELY(-FI_EAGAIN == RETURN));   \
    } while (0);


extern opal_common_ofi_module_t opal_common_ofi;

/**
 * Common MCA registration
 *
 * Common MCA registration handlinge.  After calling this function,
 * \code opal_common_ofi.output will be properly initialized.
 *
 * @param component (IN) OFI component being initialized
 *
 * @returns OPAL_SUCCESS on success, OPAL error code on failure
 */
OPAL_DECLSPEC int opal_common_ofi_mca_register(const mca_base_component_t *component);

/**
 * Initializes common objects for libfabric
 *
 * Initialize common libfabric interface.  This should be called from
 * any other OFI component's component_open() call.
 *
 * @note This function is not thread safe and must be called in a
 * serial portion of the code.
 */
OPAL_DECLSPEC int opal_common_ofi_open(void);

/**
 * Cleans up common objects for libfabric
 *
 * Clean up common libfabric interface.  This should be called from
 * any other OFI component's component_close() call.  Resource cleanup
 * is reference counted, so any successful call to
 * opal_common_ofi_init().
 *
 * @note This function is not thread safe and must be called in a
 * serial portion of the code.
 */
OPAL_DECLSPEC int opal_common_ofi_close(void);

/**
 * Export our memory hooks into Libfabric monitor
 *
 * Use Open MPI's memory hooks to provide monitor notifications to
 * Libfabric via the external mr_cache facility.  This must be called
 * before any domain is initialized (ie, before any Libfabric memory
 * monitor is configured).
 *
 * @returns A libfabric error code is returned on error
 */
OPAL_DECLSPEC int opal_common_ofi_export_memory_monitor(void);

/**
 * Search function for provider names
 *
 * This function will take a provider name string and a list of lower
 * provider name strings as inputs. It will return true if the lower
 * provider in the item string matches a lower provider in the list.
 *
 * @param list (IN)    List of strings corresponding to lower providers.
 * @param item (IN)    Single string corresponding to a provider.
 *
 * @return 0           The lower provider of the item string is not in
 *                     list or an input was NULL
 * @return 1           The lower provider of the item string matches
 *                     a string in the item list.
 *
 */
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);

/**
 * Get the number of providers whose names are included in a list
 *
 * This function takes a list of providers and a list of name strings
 * as inputs, and return the number of providers whose names are included
 * in the name strings.
 *
 * @param provider_list (IN)    List of providers
 * @param list          (IN)    List of name string
 *
 * @return                      Number of matched providers
 *
 */
OPAL_DECLSPEC int opal_common_ofi_count_providers_in_list(struct fi_info *provider_list,
                                                          char **list);

/**
 * Determine whether all providers are included in a list
 *
 * This function takes a list of providers and a list of name strings
 * as inputs, and return whether all provider names are included in the name strings.
 *
 * @param provider_list (IN)    List of providers
 * @param list          (IN)    List of name string
 *
 * @return  0                   At least one provider's name is not included in the name strings.
 * @return  1                   All provider names are included in the name strings.
 *
 */
OPAL_DECLSPEC int opal_common_ofi_providers_subset_of_list(struct fi_info *provider_list,
                                                           char **list);

/**
 * Selects NIC (provider) based on hardware locality
 *
 * The selection is based on the following priority:
 *
 * Single-NIC:
 * 
 *      If only 1 provider is available, always return that provider.
 * 
 * Multi-NIC:
 * 
 *      1. If the process is NOT bound, pick a NIC using (local rank % number
 *      of providers of the same type). This gives a fair chance to each
 *      qualified NIC and balances overall utilization.
 *
 *      2. If the process is bound, we compare providers in the list that have
 *      the same type as the first provider, and find the provider with the
 *      shortest distance to the current process. 
 * 
 *          i. If the provider has PCI BDF data, we attempt to compute the
 *          distance between the NIC and the current process cpuset. The NIC
 *          with the shortest distance is returned.
 * 
 *              * For equidistant NICs, we select a NIC in round-robin fashion
 *              using the package rank of the current process, i.e. (package
 *              rank % number of providers with the same distance).
 *
 *          ii. If we cannot compute the distance between the NIC and the
 *          current process, e.g. PCI BDF data is not available, a NIC will be
 *          selected in a round-robin fashion using package rank, i.e. (package
 *          rank % number of providers of the same type).
 *
 * @param[in]   provider_list   struct fi_info* An initially selected
 *                              provider NIC. The provider name and
 *                              attributes are used to restrict NIC
 *                              selection. This provider is returned if the
 *                              NIC selection fails.
 * 
 * @param[in]   process_info    opal_process_info_t* The current process info
 *
 * @param[out]  provider        struct fi_info* object with the selected
 *                              provider if the selection succeeds
 *                              if the selection fails, returns the fi_info
 *                              object that was initially provided.
 *
 * All errors should be recoverable and will return the initially provided
 * provider. However, if an error occurs we can no longer guarantee
 * that the provider returned is local to the process or that the processes will
 * balance across available NICs.
 *
 */
OPAL_DECLSPEC struct fi_info *opal_common_ofi_select_provider(struct fi_info *provider_list,
                                                              opal_process_info_t *process_info);

/**
 * Obtain EP endpoint name
 *
 * Obtain the EP endpoint name and length for the supplied endpoint fid.
 *
 * @param fid (IN)     fid of (S)EP endpoint
 * @param addr (OUT)   buffer containing endpoint name 
 * @param addrlen (OUT) length of allocated buffer in bytes
 *
 * @return             OPAL_SUCCESS or OPAL error code
 *
 * The caller is responsible for freeing the buffer allocated to
 * contain the endpoint name.
 *
 */
OPAL_DECLSPEC int opal_common_ofi_fi_getname(fid_t fid, void **addr, size_t *addrlen);

END_C_DECLS

#endif /* OPAL_MCA_COMMON_OFI_H */