1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2021 Bull S.A.S. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Partitioned Communication (PART)
*
* An MCA component type that provides the partitioned interface functionality
* required by the MPI-4 specification. Part is designed to act as intermediary
* between the MPI layer and another transfer layer. This differs from other
* components, such as PML, by allowing the component to leverage the underlying
* transfer mechanism to be another MPI layer, such as the osc component/the
* RMA interface.
*
* ------------------------------------
* | MPI |
* ------------------------------------
* | PART |
* ------------------------------------
* | OSC (RDMA) |
* ------------------------------------
*
* The initial implementation is currently leveraging the RMA interface,
* with the intent to remove the MPI layer and directly call the osc component.
* Other transport mechanisms could be used in future implementation (such as
* the MTL and BTL components).
*
* This component and it's initial module are under development and have
* extra restrictions on use than described in the MPI-4 specification.
* Currently, MPI_Psend_init and MPI_Precv_init are both blocking in the RMA
* component which requires careful use to avoid deadlocks. This will
* be addressed in future updates.
*/
#ifndef MCA_PART_H
#define MCA_PART_H
#include "ompi_config.h"
#include "ompi/mca/mca.h"
#include "mpi.h" /* needed for MPI_ANY_TAG */
#include "ompi/request/request.h"
BEGIN_C_DECLS
struct ompi_proc_t;
/**
* MCA->PART Called by MCA framework to initialize the component.
*
* @param priority (OUT) Relative priority or ranking used by MCA to
* selected a component.
*
* @param enable_progress_threads (IN) Whether this component is
* allowed to run a hidden/progress thread or not.
*
* @param enable_mpi_threads (IN) Whether support for multiple MPI
* threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which
* indicates whether multiple threads may invoke this component
* simultaneously or not.
*/
typedef struct mca_part_base_module_1_0_1_t * (*mca_part_base_component_init_fn_t)(
int *priority,
bool enable_progress_threads,
bool enable_mpi_threads);
typedef int (*mca_part_base_component_finalize_fn_t)(void);
/**
* PART component version and interface functions.
*/
struct mca_part_base_component_4_0_0_t {
mca_base_component_t partm_version;
mca_base_component_data_t partm_data;
mca_part_base_component_init_fn_t partm_init;
mca_part_base_component_finalize_fn_t partm_finalize;
};
typedef struct mca_part_base_component_4_0_0_t mca_part_base_component_4_0_0_t;
typedef mca_part_base_component_4_0_0_t mca_part_base_component_t;
/**
* MCA management functions.
*/
/**
* For non-threaded case, provides MCA the opportunity to
* progress outstanding requests on all btls.
*
* * @return Count of "completions", a metric of
* how many items where completed in the call
* to progress.
*/
typedef int (*mca_part_base_module_progress_fn_t)(void);
/**
* MPI Interface Functions
*/
/**
* Initialize a partitioned receive request.
*
* @param buf (IN) User buffer.
* @param parts (IN) Number of partitions.
* @param count (IN) Number of elements of the specified datatype.
* @param datatype (IN) User defined datatype.
* @param src (IN) Source rank w/in communicator.
* @param tag (IN) User defined tag.
* @param comm (IN) Communicator.
* @param request (OUT) Request handle.
* @return OMPI_SUCCESS or failure status.
*/
typedef int (*mca_part_base_module_precv_init_fn_t)(
void *buf,
size_t parts,
size_t count,
struct ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_info_t * info,
struct ompi_request_t **request
);
/**
* Initialize a partitioned send request.
*
* @param buf (IN) User buffer.
* @param parts (IN) Number of partitions.
* @param count (IN) Number of elements of the specified datatype.
* @param datatype (IN) User defined datatype.
* @param dst (IN) Peer rank w/in communicator.
* @param tag (IN) User defined tag.
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
* @param comm (IN) Communicator.
* @param request (OUT) Request handle.
* @return OMPI_SUCCESS or failure status.
*/
typedef int (*mca_part_base_module_psend_init_fn_t)(
const void *buf,
size_t parts,
size_t count,
struct ompi_datatype_t *datatype,
int dst,
int tag,
struct ompi_communicator_t* comm,
struct ompi_info_t * info,
struct ompi_request_t **request
);
/**
* Initiate one or more partitioned requests.
*
* @param count (IN) Number of requests
* @param requests (IN/OUT) Array of persistent requests
* @return OMPI_SUCCESS or failure status.
*/
typedef ompi_request_start_fn_t mca_part_base_module_start_fn_t;
/**
* Mark a range of partitions ready in a partitioned send request.
*
* @param min_part Minimum partition to mark ready for transfer.
* @param max_part Maximum partition to mark ready for transfer.
* @param request (IN/OUT) Request
* @return OMPI_SUCCESS or failure status.
*
*/
typedef int (*mca_part_base_module_pready_fn_t)(
size_t min_part,
size_t max_part,
struct ompi_request_t* request
);
/**
* Check a range of partitions in a partitioned receive request.
*
* @param min_part Minimum partition to check.
* @param max_part Maximum partition to check.
* @param flag Flag for completion of entire range.
* @param request (IN/OUT) Request
* @return OMPI_SUCCESS or failure status.
*
*/
typedef int (*mca_part_base_module_parrived_fn_t)(
size_t min_part,
size_t max_part,
int* flag,
struct ompi_request_t* request
);
/**
* PART instance.
*/
struct mca_part_base_module_1_0_1_t {
/* downcalls from MCA to PART */
mca_part_base_module_progress_fn_t part_progress;
/* downcalls from MPI to PART */
mca_part_base_module_precv_init_fn_t part_precv_init;
mca_part_base_module_psend_init_fn_t part_psend_init;
mca_part_base_module_start_fn_t part_start;
mca_part_base_module_pready_fn_t part_pready;
mca_part_base_module_parrived_fn_t part_parrived;
/* diagnostics */
/* FT Event */
/* maximum constant sizes */
};
typedef struct mca_part_base_module_1_0_1_t mca_part_base_module_1_0_1_t;
typedef mca_part_base_module_1_0_1_t mca_part_base_module_t;
/*
* Macro for use in components that are of type part
*/
#define MCA_PART_BASE_VERSION_2_0_0 \
OMPI_MCA_BASE_VERSION_2_1_0("part", 4, 0, 0)
OMPI_DECLSPEC extern mca_part_base_module_t mca_part;
END_C_DECLS
#endif /* MCA_PART_H */
|