File: pt2ptf2.c

package info (click to toggle)
mpich 4.3.0%2Breally4.2.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 419,120 kB
  • sloc: ansic: 1,215,557; cpp: 74,755; javascript: 40,763; f90: 20,649; sh: 18,463; xml: 14,418; python: 14,397; perl: 13,772; makefile: 9,279; fortran: 8,063; java: 4,553; asm: 324; ruby: 176; lisp: 19; php: 8; sed: 4
file content (97 lines) | stat: -rw-r--r-- 3,017 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/*
 * Copyright (C) by Argonne National Laboratory
 *     See COPYRIGHT in top-level directory
 */

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include "mpitest.h"

/*
static char MTEST_Descrip[] = "Test error reporting from faults with point to point communication";
*/

int ReportErr(int errcode, const char name[]);

int main(int argc, char *argv[])
{
    int wrank, wsize, rank, size, color;
    int j, tmp;
    int err, toterrs, errs = 0;
    MPI_Comm newcomm;

    MTest_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);

    /* Color is 0 or 1; 1 will be the processes that "fault" */
    /* process 0 and wsize/2+1...wsize-1 are in non-faulting group */
    color = (wrank > 0) && (wrank <= wsize / 2);
    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &newcomm);

    MPI_Comm_size(newcomm, &size);
    MPI_Comm_rank(newcomm, &rank);

    /* Set errors return on COMM_WORLD and the new comm */
    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
    MPI_Comm_set_errhandler(newcomm, MPI_ERRORS_RETURN);

    err = MPI_Barrier(MPI_COMM_WORLD);
    if (err)
        errs += ReportErr(err, "Barrier");
    if (color) {
        /* Simulate a fault on some processes */
        exit(1);
    } else {
        /* To improve the chance that the "faulted" processes will have
         * exited, wait for 1 second */
        MTestSleep(1);
    }

    /* Can we still use newcomm? */
    for (j = 0; j < rank; j++) {
        err = MPI_Recv(&tmp, 1, MPI_INT, j, 0, newcomm, MPI_STATUS_IGNORE);
        if (err)
            errs += ReportErr(err, "Recv");
    }
    for (j = rank + 1; j < size; j++) {
        err = MPI_Send(&rank, 1, MPI_INT, j, 0, newcomm);
        if (err)
            errs += ReportErr(err, "Recv");
    }

    /* Now, try sending in MPI_COMM_WORLD on dead processes */
    /* There is a race condition here - we don't know for sure that the faulted
     * processes have exited.  However, we can ensure a failure by using
     * synchronous sends - the sender will wait until the receiver handles
     * receives the message, which will not happen (the process will exit
     * without matching the message, even if it has not yet exited). */
    for (j = 1; j <= wsize / 2; j++) {
        err = MPI_Ssend(&rank, 1, MPI_INT, j, 0, MPI_COMM_WORLD);
        if (!err) {
            errs++;
            fprintf(stderr, "Ssend succeeded to dead process %d\n", j);
        }
    }

    err = MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, newcomm);
    if (err)
        errs += ReportErr(err, "Allreduce");
    MPI_Comm_free(&newcomm);

    MTest_Finalize(toterrs);

    return MTestReturnValue(errs);
}

int ReportErr(int errcode, const char name[])
{
    int errclass, errlen;
    char errmsg[MPI_MAX_ERROR_STRING];
    MPI_Error_class(errcode, &errclass);
    MPI_Error_string(errcode, errmsg, &errlen);
    fprintf(stderr, "In %s, error code %d(class %d) = %s\n", name, errcode, errclass, errmsg);
    return 1;
}