1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
/*
* Copyright (C) by Argonne National Laboratory
* See COPYRIGHT in top-level directory
*/
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include "mpitest.h"
#include <math.h> /* for fabs(3) */
/* Measure and compare the relative performance of MPI_Group_translate_ranks
* with small and large group2 sizes but a constant number of ranks. This
* serves as a performance sanity check for the Scalasca use case where we
* translate to MPI_COMM_WORLD ranks. The performance should only depend on the
* number of ranks passed, not the size of either group (especially group2).
*
* This test is probably only meaningful for large-ish process counts, so we may
* not be able to run this test by default in the nightlies. */
/* number of iterations used for timing */
#define NUM_LOOPS (1000000)
int main(int argc, char *argv[])
{
int errs = 0;
int *ranks;
int *ranksout;
MPI_Group gworld, grev, gself;
MPI_Comm comm;
MPI_Comm commrev;
int rank, size, i;
double start, end, time1, time2;
MTest_Init(&argc, &argv);
comm = MPI_COMM_WORLD;
MPI_Comm_size(comm, &size);
MPI_Comm_rank(comm, &rank);
ranks = malloc(size * sizeof(int));
ranksout = malloc(size * sizeof(int));
if (!ranks || !ranksout) {
fprintf(stderr, "out of memory\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
/* generate a comm with the rank order reversed */
MPI_Comm_split(comm, 0, (size - rank - 1), &commrev);
MPI_Comm_group(commrev, &grev);
MPI_Comm_group(MPI_COMM_SELF, &gself);
MPI_Comm_group(comm, &gworld);
/* sanity check correctness first */
for (i = 0; i < size; i++) {
ranks[i] = i;
ranksout[i] = -1;
}
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
for (i = 0; i < size; i++) {
if (ranksout[i] != (size - i - 1)) {
if (rank == 0)
printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i,
(size - rank - 1), ranksout[i]);
++errs;
}
}
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
for (i = 0; i < size; i++) {
int expected = (i == (size - rank - 1) ? 0 : MPI_UNDEFINED);
if (ranksout[i] != expected) {
if (rank == 0)
printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected,
ranksout[i]);
++errs;
}
}
/* now compare relative performance */
/* we needs lots of procs to get a group large enough to have meaningful
* numbers. On most testing machines this means that we're oversubscribing
* cores in a big way, which might perturb the timing results. So we make
* sure everyone started up and then everyone but rank 0 goes to sleep to
* let rank 0 do all the timings. */
MPI_Barrier(comm);
if (rank != 0) {
MTestSleep(10);
} else { /* rank==0 */
MTestSleep(1); /* try to avoid timing while everyone else is making syscalls */
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter */
start = MPI_Wtime();
for (i = 0; i < NUM_LOOPS; ++i) {
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
}
end = MPI_Wtime();
time1 = end - start;
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter */
start = MPI_Wtime();
for (i = 0; i < NUM_LOOPS; ++i) {
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
}
end = MPI_Wtime();
time2 = end - start;
/* complain if the "gworld" time exceeds 2x the "gself" time */
if (fabs(time1 - time2) > (2.00 * time2)) {
printf("too much difference in MPI_Group_translate_ranks performance:\n");
printf("time1=%f time2=%f\n", time1, time2);
printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1 - time2) / time2));
if (time1 < time2) {
printf("also, (time1<time2) is surprising...\n");
}
++errs;
}
}
free(ranks);
free(ranksout);
MPI_Group_free(&grev);
MPI_Group_free(&gself);
MPI_Group_free(&gworld);
MPI_Comm_free(&commrev);
MTest_Finalize(errs);
return MTestReturnValue(errs);
}
|