File: failure_ack.c

package info (click to toggle)
mpich 4.0.2-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 423,384 kB
  • sloc: ansic: 1,088,434; cpp: 71,364; javascript: 40,763; f90: 22,829; sh: 17,463; perl: 14,773; xml: 14,418; python: 10,265; makefile: 9,246; fortran: 8,008; java: 4,355; asm: 324; ruby: 176; lisp: 19; php: 8; sed: 4
file content (116 lines) | stat: -rw-r--r-- 4,114 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*
 * Copyright (C) by Argonne National Laboratory
 *     See COPYRIGHT in top-level directory
 */

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

/*
 * This test makes sure that after a failure, the correct group of failed
 * processes is returned from MPIX_Comm_failure_ack/get_acked.
 */
int main(int argc, char **argv)
{
    int rank, size, err, result, i;
    char buf[10] = " No errors";
    char error[MPI_MAX_ERROR_STRING];
    MPI_Group failed_grp, one_grp, world_grp;
    int one[] = { 1 };
    int world_ranks[] = { 0, 1, 2 };
    int failed_ranks[3];

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if (size < 3) {
        fprintf(stderr, "Must run with at least 3 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

    if (rank == 1) {
        exit(EXIT_FAILURE);
    }

    if (rank == 0) {
        err = MPI_Recv(buf, 10, MPI_CHAR, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        if (MPI_SUCCESS == err) {
            fprintf(stderr, "Expected a failure for receive from rank 1\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        err = MPIX_Comm_failure_ack(MPI_COMM_WORLD);
        if (MPI_SUCCESS != err) {
            int ec;
            MPI_Error_class(err, &ec);
            MPI_Error_string(err, error, &size);
            fprintf(stderr, "MPIX_Comm_failure_ack returned an error: %d\n%s", ec, error);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        err = MPIX_Comm_failure_get_acked(MPI_COMM_WORLD, &failed_grp);
        if (MPI_SUCCESS != err) {
            int ec;
            MPI_Error_class(err, &ec);
            MPI_Error_string(err, error, &size);
            fprintf(stderr, "MPIX_Comm_failure_get_acked returned an error: %d\n%s", ec, error);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        MPI_Comm_group(MPI_COMM_WORLD, &world_grp);
        MPI_Group_incl(world_grp, 1, one, &one_grp);
        MPI_Group_compare(one_grp, failed_grp, &result);
        if (MPI_IDENT != result) {
            fprintf(stderr, "First failed group contains incorrect processes\n");
            MPI_Group_size(failed_grp, &size);
            MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks);
            for (i = 0; i < size; i++)
                fprintf(stderr, "DEAD: %d\n", failed_ranks[i]);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        MPI_Group_free(&failed_grp);

        err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        if (MPI_SUCCESS != err) {
            fprintf(stderr, "First receive failed\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        if (MPI_SUCCESS == err) {
            fprintf(stderr, "Expected a failure for receive from rank 2\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        err = MPIX_Comm_failure_get_acked(MPI_COMM_WORLD, &failed_grp);
        if (MPI_SUCCESS != err) {
            int ec;
            MPI_Error_class(err, &ec);
            MPI_Error_string(err, error, &size);
            fprintf(stderr, "MPIX_Comm_failure_get_acked returned an error: %d\n%s", ec, error);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        MPI_Group_compare(one_grp, failed_grp, &result);
        if (MPI_IDENT != result) {
            fprintf(stderr, "Second failed group contains incorrect processes\n");
            MPI_Group_size(failed_grp, &size);
            MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks);
            for (i = 0; i < size; i++)
                fprintf(stderr, "DEAD: %d\n", failed_ranks[i]);
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        fprintf(stdout, " No errors\n");
    } else if (rank == 2) {
        MPI_Ssend(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD);

        exit(EXIT_FAILURE);
    }

    MPI_Group_free(&failed_grp);
    MPI_Group_free(&one_grp);
    MPI_Group_free(&world_grp);
    MPI_Finalize();
}