File: treematch.h

package info (click to toggle)
openmpi 5.0.8-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 201,684 kB
  • sloc: ansic: 613,078; makefile: 42,353; sh: 11,194; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,179; python: 1,859; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (211 lines) | stat: -rw-r--r-- 8,464 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#ifndef __TREEMATCH_H__
#define __TREEMATCH_H__

/* size_t definition */
#include <stddef.h>
#include "tm_verbose.h"
#include "ompi_config.h"

/********* TreeMatch Public Enum **********/

/*type of topology files that can be read*/
typedef enum{
  TM_FILE_TYPE_UNDEF,
  TM_FILE_TYPE_XML,
  TM_FILE_TYPE_TGT
} tm_file_type_t;

/* different metrics to evaluate the solution */
typedef enum{
  TM_METRIC_SUM_COM  = 1,
  TM_METRIC_MAX_COM  = 2,
  TM_METRIC_HOP_BYTE = 3
} tm_metric_t;

/* numbering */
typedef enum{
  TM_NUMBERING_LOGICAL   = 0,
  TM_NUMBERING_PHYSICAL  = 1
} tm_numbering_t;

/********* TreeMatch Public Structures **********/

typedef struct _job_info_t{
  int submit_date;
  int job_id;
  int finish_date;
} tm_job_info_t;

typedef struct _tm_tree_t{
  int constraint; /* tells if the tree has been constructed with constraints on the nodes or not.
		     Usefull for freeing it. needs to be set on the root only*/
  struct _tm_tree_t **child;
  struct _tm_tree_t *parent;
  struct _tm_tree_t *tab_child; /* The pointer to be freed */
  double val;
  int arity;
  int depth;
  int id;   /* id of the node or the leaf. Ids are different onmly on a given level */
  int uniq; /* uniq id in the whole tree */
  int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately */
  tm_job_info_t *job_info;
  int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only */
}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ? */

/* Maximum number of levels in the tree*/
#define TM_MAX_LEVELS 100

typedef struct {
  int *arity;             /* Arity of the nodes of each level*/
  int nb_levels;          /* Number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
  size_t *nb_nodes;       /* Number of nodes of each level*/
  int physical_num;       /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */
  int *node_id;           /* ID of the nodes of the tree of the last level*/
  int *node_rank ;        /* Rank of the nodes of the tree for the last level given its ID: this is the inverse tab of node_id*/

  size_t *nb_free_nodes;  /* Nb of available nodes of each level*/
  int **free_nodes;       /* array of node that are free: useful to simulate batch scheduler*/
  double *cost;           /* Cost of the communication depending on the distance:
			    cost[i] is the cost for communicating at distance nb_levels-i*/
  
  int *constraints;       /* Array of constraints: id of the nodes where it is possible to map processes */
  int nb_constraints;     /* Size of the above array */
  int oversub_fact;       /* Maximum number of processes to be mapped on a given node */
  int nb_proc_units;      /* The real number of units used for computation */
}tm_topology_t;


typedef struct {
  double ** mat;
  double *  sum_row;
  int order;
  long int nnz; /* number of non zero entries */
} tm_affinity_mat_t;

/*
 sigma[i] is such that  process i is mapped on core sigma[i]
 k[i][j] is such that core i executes process k[i][j] (0<=j<<=oversubscribing factor - 1)

 size of sigma is the number of processes (nb_objs)
 size of k is the number of cores/nodes   (nb_compute_units)
 size of k[i] is the number of process we can execute per nodes (1 if no oversubscribing)

 We must have number of process<=number of cores

 k[i] == NULL if no process is mapped on core i
*/

typedef struct {
  int *sigma;
  size_t sigma_length;
  int **k;
  size_t k_length;
  int oversub_fact;
}tm_solution_t;


/************ TreeMatch Public API ************/
/* construct topology from local one using hwloc */
OMPI_HIDDEN tm_topology_t* tm_get_local_topology_with_hwloc(void);

/* Aletrnatively, load XML or TGT topology */
OMPI_HIDDEN tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
/*
   Alternatively, build a synthetic balanced topology.

   nb_levels : number of levels of the topology +1 (the last level must be of cost 0 and arity 0).
   arity : array of arity of the first nb_level (of size nb_levels)
   cost : array of costs between the levels (of size nb_levels)
   core_numbering: numbering of the core by the system. Array of size nb_core_per_node

   nb_core_per_nodes: number of cores of a given node. Size of the array core_numbering

   both arity and cost are copied inside tm_build_synthetic_topology

   The numbering of the cores is done in round robin fashion after a width traversal of the topology.
   for example:
       {0,1,2,3} becomes 0,1,2,3,4,5,6,7...
   and
       {0,2,1,3} becomes 0,2,1,3,4,6,5,7,...

   Example of call to build the 128.tgt file: tleaf 4 16 500 2 100 2 50 2 10

   double cost[5] = {500,100,50,10,0};
   int arity[5] = {16,2,2,2,0};
   int cn[2]={0,1};

   topology = tm_build_synthetic_topology(arity,cost,5,cn,2);

 */
OMPI_HIDDEN tm_topology_t  *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
/* load affinity matrix */
OMPI_HIDDEN tm_affinity_mat_t *tm_load_aff_mat(char *com_filename);
/*
   Alternativelly, build the affinity matrix from a array of array of matrix of size order by order
   For performance reason mat is not copied.
*/
OMPI_HIDDEN tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
/* Add constraints to toplogy
   Return 1 on success and 0  if the constari,ts id are not compatible withe nodes id */
OMPI_HIDDEN int tm_topology_add_binding_constraints(char *bind_filename, tm_topology_t *topology);
/* Alternatively, set the constraints from an array.
   Return 1 on success and 0  if the constari,ts id are not compatible withe nodes id

   The array constraints is copied inside tm_topology_set_binding_constraints

*/
OMPI_HIDDEN int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology);
/* display arity of the topology */
OMPI_HIDDEN void  tm_display_arity(tm_topology_t *topology);
/* display the full topology */
OMPI_HIDDEN void  tm_display_topology(tm_topology_t *topology);
/* Optimize the topology by decomposing arities */
OMPI_HIDDEN void tm_optimize_topology(tm_topology_t **topology);
/* Manage oversubscribing */
OMPI_HIDDEN void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact);
/* core of the treematch: compute the solution tree */
OMPI_HIDDEN tm_tree_t *tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed);
/* compute the mapping according to the tree and the core numbering*/
OMPI_HIDDEN tm_solution_t *tm_compute_mapping(tm_topology_t *topology, tm_tree_t *comm_tree);
/* display the solution*/
OMPI_HIDDEN double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, tm_metric_t metric);
/* display RR, packed, MPIPP*/
OMPI_HIDDEN void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
/* free TM strutures*/
OMPI_HIDDEN void tm_free_topology(tm_topology_t *topology);
OMPI_HIDDEN void tm_free_tree(tm_tree_t *comm_tree);
OMPI_HIDDEN void tm_free_solution(tm_solution_t *sol);
OMPI_HIDDEN void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
/* manage verbosity of TM*/
void tm_set_verbose_level(unsigned int level);
unsigned int  tm_get_verbose_level(void);
/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/
OMPI_HIDDEN void tm_finalize(void);

/*
Ask for exhaustive search: may be very long
   new_val == 0 : no exhuative search
   new_val != 0 : exhuative search
*/
OMPI_HIDDEN void tm_set_exhaustive_search_flag(int new_val);
OMPI_HIDDEN int tm_get_exhaustive_search_flag(void);

/*
Ask for greedy k-partitionning even if scotch is available
   new_val == 0 : no greedy k-partitionning
   new_val != 0 : greedy k-partitionning
*/
OMPI_HIDDEN void tm_set_greedy_flag(int new_val);
OMPI_HIDDEN int tm_get_greedy_flag(void);


/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */
OMPI_HIDDEN void tm_set_max_nb_threads(unsigned int val);

/* managing the usage of physical vs. logical core numbering when using hwloc/xml files */
OMPI_HIDDEN void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
OMPI_HIDDEN tm_numbering_t  tm_get_numbering(void); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */

#include "tm_malloc.h"

#endif