1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
/*
* Copyright (C) by Argonne National Laboratory
* See COPYRIGHT in top-level directory
*/
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include "mpitest.h"
/*
static char MTEST_Descrip[] = "Test error reporting from faults with collective communication";
*/
int ReportErr(int errcode, const char name[]);
int main(int argc, char *argv[])
{
int wrank, wsize, rank, size, color;
int tmp, errs = 0;
MPI_Comm newcomm;
MTest_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
/* Color is 0 or 1; 1 will be the processes that "fault" */
/* process 0 and wsize/2+1...wsize-1 are in non-faulting group */
color = (wrank > 0) && (wrank <= wsize / 2);
MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &newcomm);
MPI_Comm_size(newcomm, &size);
MPI_Comm_rank(newcomm, &rank);
/* Set errors return on COMM_WORLD and the new comm */
MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
MPI_Comm_set_errhandler(newcomm, MPI_ERRORS_RETURN);
MPI_Barrier(MPI_COMM_WORLD);
if (color) {
/* Simulate a fault on some processes */
exit(1);
}
/* Can we still use newcomm? */
MPI_Allreduce(&rank, &tmp, 1, MPI_INT, MPI_SUM, newcomm);
if (tmp != (size * (size + 1)) / 2) {
printf("Allreduce gave %d but expected %d\n", tmp, (size * (size + 1)) / 2);
errs++;
}
MPI_Comm_free(&newcomm);
MTest_Finalize(0);
return MTestReturnValue(errs);
}
int ReportErr(int errcode, const char name[])
{
int errclass, errlen;
char errmsg[MPI_MAX_ERROR_STRING];
MPI_Error_class(errcode, &errclass);
MPI_Error_string(errcode, errmsg, &errlen);
fprintf(stderr, "In %s, error code %d(class %d) = %s\n", name, errcode, errclass, errmsg);
return 1;
}
|