1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
* Copyright (c) 2022 IBM Corporation. All rights reserved
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_COLL_HAN_DYNAMIC_H
#define MCA_COLL_HAN_DYNAMIC_H
#include "ompi_config.h"
#include "mpi.h"
#include "ompi/mca/mca.h"
#include "opal/util/output.h"
#include "ompi/mca/coll/base/coll_base_functions.h"
#include "ompi/mca/coll/han/coll_han.h"
/*
* @file
* #################################################
* # Dynamic rules global architecture description #
* #################################################
*
* Han dynamic rules allow the user to define the collective
* module to call depending on the topological configuration of the
* sub-communicators and the collective parameters. This mechanism
* can also be used to fallback to the main collective on another module.
* The interface is described in coll_han_dynamic_file.h.
*
* #############################
* # Collective module storage #
* #############################
* To be able to switch between multiple collective modules, han
* directly accesses the module on the communicator. This information is
* stored in the collective structure of the communicator during the collective
* module choice at the communicator initialization. When han needs this
* information for the first time, it identifies the modules by their name and
* stores them in its module structure.
* Then, the modules are identified by their identifier.
*
* #########################
* # Dynamic rules storage #
* #########################
* There are two types of dynamic rules:
* - MCA parameter defined rules
* - File defined rules
*
* MCA parameter defined rules are stored in mca_coll_han_component.mca_sub_components.
* This is a double indexed table. The first index is the corresponding collective
* communication and the second index is the topological level aimed by the rule.
* These parameters define the collective component to use for a specific
* collective communication on a specific topologic level.
*
* File defined rules are stored in mca_coll_han_component.dynamic_rules.
* These structures are defined below. The rule storage is directly deduced
* from the rule file format.
*
* File defined rules precede MCA parameter defined rules.
*
* #######################
* # Dynamic rules usage #
* #######################
* To choose which collective module to use on a specific configuration, han
* adds an indirection on the collective call: dynamic choice functions. These
* functions do not implement any collective. First, they try to find a dynamic
* rule from file for the given collective. If there is not any rule for the
* given configuration, MCA parameter defined rules are used. Once the module
* to use is found, the correct collective implementation is called.
*
* This indirection is also used on the global communicator. This allows han
* to provide a fallback mechanism considering the collective parameters.
*
* ##############################
* # Dynamic rules choice logic #
* ##############################
* Dynamic rules choice is made with a stack logic. Each new rule precedes
* already defined rules. MCA parameters rules are the stack base. When
* a rule is needed, rules are read as a stack and the first corresponding
* encountered is chosen.
*
* Consequences:
* - If a collective identifier appears multiple times, only the last
* will be considered
* - If a topological level appears multiple times for a collective,
* only the last will be considered
* - If configuration rules or message size rules are not stored
* by increasing value, some of them will not be considered
*/
/* Dynamic rules support */
typedef enum COMPONENTS {
SELF = 0,
BASIC,
LIBNBC,
TUNED,
SM,
ADAPT,
HAN,
COMPONENTS_COUNT
} COMPONENT_T;
typedef struct {
COMPONENT_T id;
char* component_name;
mca_coll_base_component_t* component;
} ompi_coll_han_components;
extern ompi_coll_han_components ompi_coll_han_available_components[COMPONENTS_COUNT];
/* Topologic levels */
typedef enum TOPO_LVL {
INTRA_NODE = 0,
INTER_NODE,
/* Identifies the global communicator as a topologic level */
GLOBAL_COMMUNICATOR,
NB_TOPO_LVL
} TOPO_LVL_T;
/* Rule for a specific msg size
* in a specific configuration
* for a specific collective
* in a specific topologic level */
typedef struct msg_size_rule_s {
COLLTYPE_T collective_id;
TOPO_LVL_T topologic_level;
int configuration_size;
/* Message size of the rule */
size_t msg_size;
/* Component to use on this specific configuration
* and message size */
COMPONENT_T component;
int algorithm_id;
} msg_size_rule_t;
/* Rule for a specific configuration
* considering a specific collective
* in a specific topologic level */
typedef struct configuration_rule_s {
COLLTYPE_T collective_id;
TOPO_LVL_T topologic_level;
/* Number of elements of the actual topologic level
* per element of the upper topologic level */
int configuration_size;
/* Number of message size rules for this configuration */
int nb_msg_size;
/* Table of message size rules for this configuration */
msg_size_rule_t *msg_size_rules;
} configuration_rule_t;
/* Set of dynamic rules for a specific collective
* in a specific topologic level */
typedef struct topologic_rule_s {
/* Collective identifier */
COLLTYPE_T collective_id;
/* Topologic level of the rule */
TOPO_LVL_T topologic_level;
/* Rule number */
int nb_rules;
/* Table of configuration rules
* for this collective on this topologic level */
configuration_rule_t *configuration_rules;
} topologic_rule_t;
/* Set of dynamic rules for a collective */
typedef struct collective_rule_s {
COLLTYPE_T collective_id;
/* Number of topologic level for this collective */
int nb_topologic_levels;
/* Table of topologic level rules
* for this collective */
topologic_rule_t *topologic_rules;
} collective_rule_t;
/* Global dynamic rules structure */
typedef struct mca_coll_han_dynamic_rule_s {
int nb_collectives;
collective_rule_t *collective_rules;
} mca_coll_han_dynamic_rules_t;
/* Module storage */
typedef struct collective_module_storage_s {
/* Module */
mca_coll_base_module_t *module_handler;
} collective_module_storage_t;
/* Table of module storage */
typedef struct mca_coll_han_collective_modules_storage_s {
/* */
collective_module_storage_t modules[COMPONENTS_COUNT];
} mca_coll_han_collective_modules_storage_t;
/* Tests if a dynamic collective is implemented */
bool mca_coll_han_is_coll_dynamic_implemented(COLLTYPE_T coll_id);
COMPONENT_T mca_coll_han_component_name_to_id(const char* name);
int mca_coll_han_topo_lvl_name_to_id(const char *topo_level_str);
#endif
|