File: allred_float.c

package info (click to toggle)
mpich 5.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 251,828 kB
  • sloc: ansic: 1,323,147; cpp: 82,869; f90: 72,420; javascript: 40,763; perl: 28,296; sh: 19,399; python: 16,191; xml: 14,418; makefile: 9,468; fortran: 8,046; java: 4,635; pascal: 352; asm: 324; ruby: 176; awk: 27; lisp: 19; php: 8; sed: 4
file content (98 lines) | stat: -rw-r--r-- 2,981 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*
 * Copyright (C) by Argonne National Laboratory
 *     See COPYRIGHT in top-level directory
 */

#include "mpitest.h"
#include <stdio.h>
#include <stdlib.h>

/* MPI_Allreduce need produce identical results on all ranks. This is
 * particular challenging for floating point datatypes since computer
 * floating point arithmetic do not follow associative law. This means
 * certain algorithms that works for integers need to be excluded for
 * floating point.
 *
 * This test checks when an inapproprate algorithms is used for floating
 * point reduction.
 */

/* single-precision float has roughly a precision of 7 decimal digits */
#define BIG 1e6
#define TINY 1e-2

#define N 8

float buf[N];

static void init_buf(int rank, int pos1, int pos2)
{
    /* Mix a pair of (BIG, -BIG) and TINY, the sum of array will be the sum of
     * all TINYs if we add (BIG, -BIG) first, but different results following
     * different associativity. A valid algorithm need to produce consistent
     * results on all ranks.
     */
    for (int i = 0; i < N; i++) {
        if (rank == pos1) {
            buf[i] = BIG;
        } else if (rank == pos2) {
            buf[i] = -BIG;
        } else {
            buf[i] = TINY;
        }
    }
}

int main(int argc, char **argv)
{
    int errs = 0;

    MTest_Init(&argc, &argv);

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    if (size < 3) {
        printf("At least 3 processes required. More (e.g. 10) is recommended.\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    for (int pos1 = 0; pos1 < size; pos1++) {
        for (int pos2 = pos1 + 1; pos2 < size; pos2++) {
            init_buf(rank, pos1, pos2);

            MPI_Allreduce(MPI_IN_PLACE, buf, N, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);

            float *check_buf;
            if (rank == 0) {
                check_buf = malloc(N * size * sizeof(float));
            }
            MPI_Gather(buf, N, MPI_FLOAT, check_buf, N, MPI_FLOAT, 0, MPI_COMM_WORLD);

            if (rank == 0) {
                MTestPrintfMsg(1, "BIG positions = (%d, %d), result = [", pos1, pos2);
                for (int j = 0; j < N; j++) {
                    MTestPrintfMsg(1, "%f ", buf[j]);
                }
                MTestPrintfMsg(1, "]\n");

                for (int i = 0; i < size; i++) {
                    for (int j = 0; j < N; j++) {
                        if (memcmp(&check_buf[i * N + j], &buf[j], sizeof(float)) != 0) {
                            if (errs < 10) {
                                printf("(%d - %d) Result [%d] from rank %d mismatch: %f != %f\n",
                                       pos1, pos2, j, i, check_buf[i * N + j], buf[j]);
                            }
                            errs++;
                        }
                    }
                }
                free(check_buf);
            }
        }
    }

    MTest_Finalize(errs);
    return MTestReturnValue(errs);
}