1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
/*
* Copyright (C) by Argonne National Laboratory
* See COPYRIGHT in top-level directory
*/
/* modified 01/23/2011 by Jim Hoekstra - ISU
* changed test to follow mtest_init/mtest_finalize convention
* The following changes are based on suggestions from Chris Sadlo:
* variable row changed to col.
* manual transpose - code added to perform 'swap'.
* MPI_Send/MPI_Recv involving xpose changed.
*/
/* This is based on an example in the MPI standard and a bug report submitted
by Alexandr Konovalov of Intel */
#include "mpi.h"
#include <stdio.h>
#include "mpitest.h"
#define SIZE 100
#define ITER 100
int main(int argc, char *argv[])
{
int i, j, k;
static double a[SIZE][SIZE], b[SIZE][SIZE];
double t1, t2, t, ts, tst;
double temp;
int myrank, mysize, errs = 0;
MPI_Status status;
MPI_Aint sizeofreal, tmp_lb;
MPI_Datatype col, xpose;
MTest_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &mysize);
if (mysize != 2) {
fprintf(stderr, "This test must be run with 2 processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Type_get_extent(MPI_DOUBLE, &tmp_lb, &sizeofreal);
MPI_Type_vector(SIZE, 1, SIZE, MPI_DOUBLE, &col);
MPI_Type_create_hvector(SIZE, 1, sizeofreal, col, &xpose);
MPI_Type_commit(&xpose);
/* Preset the arrays so that they're in memory */
for (i = 0; i < SIZE; i++)
for (j = 0; j < SIZE; j++) {
a[i][j] = 0;
b[i][j] = 0;
}
a[SIZE - 1][0] = 1;
/* Time the transpose example */
MPI_Barrier(MPI_COMM_WORLD);
t1 = MPI_Wtime();
for (i = 0; i < ITER; i++) {
if (myrank == 0)
MPI_Send(&a[0][0], SIZE * SIZE, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD);
else
MPI_Recv(&b[0][0], 1, xpose, 0, 0, MPI_COMM_WORLD, &status);
}
t2 = MPI_Wtime();
t = (t2 - t1) / ITER;
/* Time sending the same amount of data, but without the transpose */
MPI_Barrier(MPI_COMM_WORLD);
t1 = MPI_Wtime();
for (i = 0; i < ITER; i++) {
if (myrank == 0) {
MPI_Send(&a[0][0], sizeof(a), MPI_BYTE, 1, 0, MPI_COMM_WORLD);
} else {
MPI_Recv(&b[0][0], sizeof(b), MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
}
}
t2 = MPI_Wtime();
ts = (t2 - t1) / ITER;
/* Time sending the same amount of data, with the transpose done
* as a separate step */
MPI_Barrier(MPI_COMM_WORLD);
t1 = MPI_Wtime();
for (k = 0; k < ITER; k++) {
if (myrank == 0) {
MPI_Send(&a[0][0], sizeof(a), MPI_BYTE, 1, 0, MPI_COMM_WORLD);
} else {
MPI_Recv(&b[0][0], sizeof(b), MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
for (i = 0; i < SIZE; i++)
for (j = i; j < SIZE; j++) {
temp = b[j][i];
b[j][i] = b[i][j];
b[i][j] = temp;
}
}
}
t2 = MPI_Wtime();
tst = (t2 - t1) / ITER;
/* Print out the results */
if (myrank == 1) {
/* if t and tst are too different, then there is a performance
* problem in the handling of the datatypes */
if (t > 2 * tst) {
errs++;
fprintf(stderr,
"Transpose time with datatypes is more than twice time without datatypes\n");
fprintf(stderr, "%f\t%f\t%f\n", t, ts, tst);
}
}
MPI_Type_free(&col);
MPI_Type_free(&xpose);
MTest_Finalize(errs);
return MTestReturnValue(errs);
}
|