1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Inria. All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "opal/class/opal_list.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "ompi/mca/mca.h"
#include "opal/mca/base/base.h"
#include "ompi/mca/topo/topo.h"
#include "ompi/mca/topo/base/base.h"
#include "ompi/communicator/communicator.h"
/*
* Local functions
*/
static void fill_null_pointers(int type, mca_topo_base_module_t *module);
/*
* This structure is needed so that we can close the modules
* which are not selected but were opened. mca_base_modules_close
* which does this job for us requires a opal_list_t which contains
* these modules
*/
struct queried_module_t {
opal_list_item_t super;
mca_topo_base_component_t *om_component;
mca_topo_base_module_t *om_module;
};
typedef struct queried_module_t queried_module_t;
static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL);
/*
* Only one topo module can be attached to each communicator. The
* communicator provided as argument is the old communicator, and the
* newly selected topology is __not__ supposed to be attached to it.
*
* This module calls the query function on all the components that were
* detected by topo_base_open. This function is called on a
* per-communicator basis. This function has the following function.
*
* 1. Iterate over the list of available_components.
* 2. Call the query function on each of these components.
* 3. The query function returns a module and its priority.
* 4. Select the module with the highest priority.
* 5. OBJ_RELEASE all the "losing" modules.
*/
static int _mca_topo_base_select (const ompi_communicator_t *comm, const ompi_group_t *group,
mca_topo_base_module_t *preferred_module,
mca_topo_base_module_t **selected_module,
uint32_t type)
{
int priority;
int best_priority;
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
mca_topo_base_component_t *component;
mca_topo_base_component_t *best_component;
mca_topo_base_module_t *module;
opal_list_t queried;
queried_module_t *om;
int err = MPI_SUCCESS;
/* Ensure the topo framework is initialized */
if (OMPI_SUCCESS != (err = mca_topo_base_lazy_init())) {
return err;
}
if (comm) {
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"topo:base:comm_select: new communicator: %s (cid %s)",
comm->c_name, ompi_comm_print_cid (comm));
} else {
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"topo:base:group_select: new communicator");
}
/* Check and see if a preferred component was provided. If it was
provided then it should be used (if possible) */
if (NULL != preferred_module) {
/* We have a preferred module. Check if it is available
and if so, whether it wants to run */
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"topo:base:comm_select: Checking preferred component: %s",
preferred_module->topo_component->topoc_version.mca_component_name);
/* query the component for its priority and get its module
structure. This is necessary to proceed */
component = (mca_topo_base_component_t *)preferred_module->topo_component;
module = component->topoc_query(comm, group, &priority, type);
if (NULL != module) {
/* this query seems to have returned something legitimate
* and we can now go ahead fill the NULL functions with the
* corresponding base version and then return.
*/
fill_null_pointers(type, module);
*selected_module = module;
module->topo_component = component;
return OMPI_SUCCESS;
}
/* If we get here, the preferred component is present, but is
unable to run. This is not a good sign. We should try
selecting some other component. We let it fall through
and select from the list of available components */
}
/*
* We fall here if one of the two things happened:
* 1. The preferred component was provided but for some reason was
* not able to be selected
* 2. No preferred component was provided
*
* All we need to do is to go through the list of available
* components and find the one which has the highest priority and
* use that for this communicator
*/
best_component = NULL;
best_priority = -1;
OBJ_CONSTRUCT(&queried, opal_list_t);
OPAL_LIST_FOREACH(cli, &ompi_topo_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_topo_base_component_t *) cli->cli_component;
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"select: initialising %s component %s",
component->topoc_version.mca_type_name,
component->topoc_version.mca_component_name);
/*
* we can call the query function only if there is a function :-)
*/
if (NULL == component->topoc_query) {
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"select: no query, ignoring the component");
} else {
/*
* call the query function and see what it returns
*/
module = component->topoc_query(comm, group, &priority, type);
if (NULL == module) {
/*
* query did not return any action which can be used
*/
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"select: query returned failure");
} else {
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"select: query returned priority %d",
priority);
/*
* is this the best component we have found till now?
*/
if (priority > best_priority) {
best_priority = priority;
best_component = component;
}
om = OBJ_NEW(queried_module_t);
/*
* check if we have run out of space
*/
if (NULL == om) {
OBJ_DESTRUCT(&queried);
return OMPI_ERR_OUT_OF_RESOURCE;
}
om->om_component = component;
om->om_module = module;
opal_list_append(&queried, (opal_list_item_t *)om);
} /* end else of if (NULL == module) */
} /* end else of if (NULL == component->init) */
} /* end for ... end of traversal */
/*
* Now we have alist of components which successfully returned
* their module struct. One of these components has the best
* priority. The rest have to be comm_unqueried to counter the
* effects of comm_query'ing them. Finalize happens only on
* components which should are initialized.
*/
if (NULL == best_component) {
return OMPI_ERR_NOT_FOUND;
}
/*
* We now have a list of components which have successfully
* returned their priorities from the query. We now have to
* unquery() those components which have not been selected and
* init() the component which was selected
*/
for (item = opal_list_remove_first(&queried);
NULL != item;
item = opal_list_remove_first(&queried)) {
om = (queried_module_t *) item;
if (om->om_component == best_component) {
/*
* this is the chosen component, we have to initialise the
* module of this component.
*
* ANJU: a component might not have all the functions
* defined. Wherever a function pointer is null in the
* module structure we need to fill it in with the base
* structure function pointers. This is yet to be done
*/
fill_null_pointers(type, om->om_module);
om->om_module->topo_component = best_component;
*selected_module = om->om_module;
} else {
/* this is not the "chosen one", finalize */
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"select: component %s is not selected",
om->om_component->topoc_version.mca_component_name);
OBJ_RELEASE(om->om_module);
}
OBJ_RELEASE(om);
} /* traversing through the entire list */
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"select: component %s selected",
best_component->topoc_version.mca_component_name);
#if OPAL_ENABLE_FT_MPI
if(ompi_ftmpi_enabled) {
/* check if module is tested for FT, warn if not. */
const char* ft_whitelist="";
opal_show_help("help-mpi-ft.txt", "module:untested:failundef", true,
best_component->topoc_version.mca_type_name,
best_component->topoc_version.mca_component_name,
ft_whitelist);
}
#endif /* OPAL_ENABLE_FT_MPI */
return OMPI_SUCCESS;
}
int mca_topo_base_comm_select (const ompi_communicator_t *comm, mca_topo_base_module_t *preferred_module,
mca_topo_base_module_t **selected_module, uint32_t type)
{
return _mca_topo_base_select (comm, NULL, preferred_module, selected_module, type);
}
int mca_topo_base_group_select(const ompi_group_t *group, mca_topo_base_module_t *preferred_module,
mca_topo_base_module_t **selected_module, uint32_t type)
{
return _mca_topo_base_select (NULL, group, preferred_module, selected_module, type);
}
/*
* This function fills in the null function pointers, in other words,
* those functions which are not implemented by the module with the
* pointers from the base function. Somewhere, I need to incoroporate
* a check for the common minimum functions being implemented by the
* module.
*/
static void fill_null_pointers(int type, mca_topo_base_module_t *module)
{
if( OMPI_COMM_CART == type ) {
if (NULL == module->topo.cart.cart_coords) {
module->topo.cart.cart_coords = mca_topo_base_cart_coords;
}
if (NULL == module->topo.cart.cart_create) {
module->topo.cart.cart_create = mca_topo_base_cart_create;
}
if (NULL == module->topo.cart.cart_get) {
module->topo.cart.cart_get = mca_topo_base_cart_get;
}
if (NULL == module->topo.cart.cartdim_get) {
module->topo.cart.cartdim_get = mca_topo_base_cartdim_get;
}
if (NULL == module->topo.cart.cart_map) {
module->topo.cart.cart_map = mca_topo_base_cart_map;
}
if (NULL == module->topo.cart.cart_rank) {
module->topo.cart.cart_rank = mca_topo_base_cart_rank;
}
if (NULL == module->topo.cart.cart_shift) {
module->topo.cart.cart_shift = mca_topo_base_cart_shift;
}
if (NULL == module->topo.cart.cart_sub) {
module->topo.cart.cart_sub = mca_topo_base_cart_sub;
}
} else if( OMPI_COMM_GRAPH == type ) {
if (NULL == module->topo.graph.graph_create) {
module->topo.graph.graph_create = mca_topo_base_graph_create;
}
if (NULL == module->topo.graph.graph_get) {
module->topo.graph.graph_get = mca_topo_base_graph_get;
}
if (NULL == module->topo.graph.graph_map) {
module->topo.graph.graph_map = mca_topo_base_graph_map;
}
if (NULL == module->topo.graph.graphdims_get) {
module->topo.graph.graphdims_get = mca_topo_base_graphdims_get;
}
if (NULL == module->topo.graph.graph_neighbors) {
module->topo.graph.graph_neighbors = mca_topo_base_graph_neighbors;
}
if (NULL == module->topo.graph.graph_neighbors_count) {
module->topo.graph.graph_neighbors_count = mca_topo_base_graph_neighbors_count;
}
} else if( OMPI_COMM_DIST_GRAPH == type ) {
if (NULL == module->topo.dist_graph.dist_graph_create) {
module->topo.dist_graph.dist_graph_create = mca_topo_base_dist_graph_create;
}
if (NULL == module->topo.dist_graph.dist_graph_create_adjacent) {
module->topo.dist_graph.dist_graph_create_adjacent = mca_topo_base_dist_graph_create_adjacent;
}
if (NULL == module->topo.dist_graph.dist_graph_neighbors) {
module->topo.dist_graph.dist_graph_neighbors = mca_topo_base_dist_graph_neighbors;
}
if (NULL == module->topo.dist_graph.dist_graph_neighbors_count) {
module->topo.dist_graph.dist_graph_neighbors_count = mca_topo_base_dist_graph_neighbors_count;
}
}
}
|