File: allredtrace.c

package info (click to toggle)
mpich 4.0.2-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 423,384 kB
  • sloc: ansic: 1,088,434; cpp: 71,364; javascript: 40,763; f90: 22,829; sh: 17,463; perl: 14,773; xml: 14,418; python: 10,265; makefile: 9,246; fortran: 8,008; java: 4,355; asm: 324; ruby: 176; lisp: 19; php: 8; sed: 4
file content (155 lines) | stat: -rw-r--r-- 3,880 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 * Copyright (C) by Argonne National Laboratory
 *     See COPYRIGHT in top-level directory
 */

/*
 * This code is intended to test the trace overhead when using an
 * MPI tracing package.  To perform the test, follow these steps:
 *
 * 1) Run with the versbose mode selected to determine the delay argument
 *    to use in subsequent tests:
 *      mpiexec -n 4096 allredtrace -v
 *    Assume that the computed delay count is 6237; that value is used in
 *    the following.
 *
 * 2) Run with an explicit delay count, without tracing enabled:
 *      mpiexec -n 4096 allredtrace -delaycount 6237
 *
 * 3) Build allredtrace with tracing enabled, then run:
 *      mpiexec -n 4096 allredtrace -delaycount 6237
 *
 * Compare the total times.  The tracing version should take slightly
 * longer but no more than, for example, 15%.
 */
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static int verbose = 0;
static int lCount = 0;
void Delay(int);
void SetupDelay(double);

int main(int argc, char *argv[])
{
    double usecPerCall = 100;
    double t, t1, tsum;
    int i, nLoop = 100;
    int rank;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    /* Process arguments.  We allow the delay count to be set from the
     * command line to ensure reproducibility */
    for (i = 1; i < argc; i++) {
        if (strcmp(argv[i], "-delaycount") == 0) {
            i++;
            lCount = atoi(argv[i]);
        } else if (strcmp(argv[i], "-v") == 0) {
            verbose = 1;
        } else {
            fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
            exit(1);
        }
    }

    if (lCount == 0) {
        SetupDelay(usecPerCall);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    t = MPI_Wtime();
    for (i = 0; i < nLoop; i++) {
        MPI_Allreduce(&t1, &tsum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
        Delay(lCount);
    }
    t = MPI_Wtime() - t;
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank == 0) {
        printf("For delay count %d, time is %e\n", lCount, t);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Finalize();

    return 0;
}

void SetupDelay(double usec)
{
    double t, tick;
    double sec = 1.0e-6 * usec;
    int nLoop, i, direction;


    /* Compute the number of times to run the tests to get an accurate
     * number given the timer resolution. */
    nLoop = 1;
    tick = 100 * MPI_Wtick();
    do {
        nLoop = 2 * nLoop;
        t = MPI_Wtime();
        for (i = 0; i < nLoop; i++) {
            MPI_Wtime();
        }
        t = MPI_Wtime() - t;
    }
    while (t < tick && nLoop < 100000);

    if (verbose)
        printf("nLoop = %d\n", nLoop);

    /* Start with an estimated count */
    lCount = 128;
    direction = 0;
    while (1) {
        t = MPI_Wtime();
        for (i = 0; i < nLoop; i++) {
            Delay(lCount);
        }
        t = MPI_Wtime() - t;
        t = t / nLoop;
        if (verbose)
            printf("lCount = %d, time = %e\n", lCount, t);
        if (t > 10 * tick)
            nLoop = nLoop / 2;

        /* Compare measured delay */
        if (t > 2 * sec) {
            lCount = lCount / 2;
            if (direction == 1)
                break;
            direction = -1;
        } else if (t < sec / 2) {
            lCount = lCount * 2;
            if (direction == -1)
                break;
            direction = 1;
        } else if (t < sec) {
            /* sec/2 <= t < sec , so estimate the lCount to hit sec */
            lCount = (sec / t) * lCount;
        } else
            break;
    }

    if (verbose)
        printf("lCount = %d, t = %e\n", lCount, t);

    /* Should coordinate with the other processes - take the max? */
}

volatile double delayCounter = 0;
void Delay(int count)
{
    int i;

    delayCounter = 0.0;
    for (i = 0; i < count; i++) {
        delayCounter += 2.73;
    }
}