1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
/*
* Copyright (C) by Argonne National Laboratory
* See COPYRIGHT in top-level directory
*/
/*
* This code is intended to test the trace overhead when using an
* MPI tracing package. To perform the test, follow these steps:
*
* 1) Run with the versbose mode selected to determine the delay argument
* to use in subsequent tests:
* mpiexec -n 4096 allredtrace -v
* Assume that the computed delay count is 6237; that value is used in
* the following.
*
* 2) Run with an explicit delay count, without tracing enabled:
* mpiexec -n 4096 allredtrace -delaycount 6237
*
* 3) Build allredtrace with tracing enabled, then run:
* mpiexec -n 4096 allredtrace -delaycount 6237
*
* Compare the total times. The tracing version should take slightly
* longer but no more than, for example, 15%.
*/
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int verbose = 0;
static int lCount = 0;
void Delay(int);
void SetupDelay(double);
int main(int argc, char *argv[])
{
double usecPerCall = 100;
double t, t1, tsum;
int i, nLoop = 100;
int rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* Process arguments. We allow the delay count to be set from the
* command line to ensure reproducibility */
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-delaycount") == 0) {
i++;
lCount = atoi(argv[i]);
} else if (strcmp(argv[i], "-v") == 0) {
verbose = 1;
} else {
fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
exit(1);
}
}
if (lCount == 0) {
SetupDelay(usecPerCall);
}
MPI_Barrier(MPI_COMM_WORLD);
t = MPI_Wtime();
for (i = 0; i < nLoop; i++) {
MPI_Allreduce(&t1, &tsum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
Delay(lCount);
}
t = MPI_Wtime() - t;
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0) {
printf("For delay count %d, time is %e\n", lCount, t);
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
void SetupDelay(double usec)
{
double t, tick;
double sec = 1.0e-6 * usec;
int nLoop, i, direction;
/* Compute the number of times to run the tests to get an accurate
* number given the timer resolution. */
nLoop = 1;
tick = 100 * MPI_Wtick();
do {
nLoop = 2 * nLoop;
t = MPI_Wtime();
for (i = 0; i < nLoop; i++) {
MPI_Wtime();
}
t = MPI_Wtime() - t;
}
while (t < tick && nLoop < 100000);
if (verbose)
printf("nLoop = %d\n", nLoop);
/* Start with an estimated count */
lCount = 128;
direction = 0;
while (1) {
t = MPI_Wtime();
for (i = 0; i < nLoop; i++) {
Delay(lCount);
}
t = MPI_Wtime() - t;
t = t / nLoop;
if (verbose)
printf("lCount = %d, time = %e\n", lCount, t);
if (t > 10 * tick)
nLoop = nLoop / 2;
/* Compare measured delay */
if (t > 2 * sec) {
lCount = lCount / 2;
if (direction == 1)
break;
direction = -1;
} else if (t < sec / 2) {
lCount = lCount * 2;
if (direction == -1)
break;
direction = 1;
} else if (t < sec) {
/* sec/2 <= t < sec , so estimate the lCount to hit sec */
lCount = (sec / t) * lCount;
} else
break;
}
if (verbose)
printf("lCount = %d, t = %e\n", lCount, t);
/* Should coordinate with the other processes - take the max? */
}
volatile double delayCounter = 0;
void Delay(int count)
{
int i;
delayCounter = 0.0;
for (i = 0; i < count; i++) {
delayCounter += 2.73;
}
}
|