File: simple.c

package info (click to toggle)
ga 5.9.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 18,472 kB
  • sloc: ansic: 192,963; fortran: 53,761; f90: 11,218; cpp: 5,784; makefile: 2,248; sh: 1,945; python: 1,734; perl: 534; csh: 134; asm: 106
file content (70 lines) | stat: -rw-r--r-- 2,033 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#if HAVE_CONFIG_H
#   include "config.h"
#endif

/*$id$*/
#include <stdio.h>
#include <stdlib.h>

#include <mpi.h>

#include "armci.h"
#include "message.h"

int me, nprocs;
int LOOP = 10;

int main(int argc, char **argv)
{
  int k, i;
  double **myptrs[10];
  double t0, t1, tget = 0, tnbget = 0, tput = 0, tnbput = 0, tnbwait = 0, t2 = 0;
  armci_msg_init(&argc, &argv);
  ARMCI_Init_args(&argc, &argv);
  ARMCI_Init_args(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  for (k = 0; k < 10; k++) {
    myptrs[k] = (double **)malloc(sizeof(double *) * nprocs);
    ARMCI_Malloc((void **)myptrs[k], 400000 * LOOP * sizeof(double));
    for (i = 0; i < LOOP; i++) {
      myptrs[k][me][i] = me + 0.414;
    }
    MPI_Barrier(MPI_COMM_WORLD);
    for (i = 0; i < LOOP; i++) {
      ARMCI_Get(myptrs[k][(me+1)%nprocs] + i, myptrs[k][me] + i, sizeof(double), (me + 1) % nprocs);
      /*if(myptrs[k][me][i]!=0.414+(me+1)%nprocs)ARMCI_Error("errr",myptrs[k][me][i]);*/
    }
    t0 = t1 = tget = tnbget = tput = tnbput = tnbwait = t2 = 0;
    t0 = MPI_Wtime();
    for (i = 0; i < LOOP; i++) {
      ARMCI_Get(myptrs[k][(me+1)%nprocs] + i, myptrs[k][me] + i, sizeof(double), (me + 1) % nprocs);
    }
    t1 = MPI_Wtime();
    printf("\nGet Latency=%f\n", 1e6 *(t1 - t0) / LOOP);
    fflush(stdout);
    t1 = t0 = 0;
    for (i = 0; i < LOOP; i++) {
      armci_hdl_t nbh;
      ARMCI_INIT_HANDLE(&nbh);
      t0 = MPI_Wtime();
      ARMCI_NbGet(myptrs[k][(me+1)%nprocs] + i, myptrs[k][me] + i, sizeof(double), (me + 1) % nprocs, &nbh);
      t1 = MPI_Wtime();
      ARMCI_Wait(&nbh);
      t2 = MPI_Wtime();
      tnbget += (t1 - t0);
      tnbwait += (t2 - t1);
    }
    printf("\nNb Get Latency=%f Nb Wait=%f\n", 1e6 * tnbget / LOOP, 1e6 * tnbwait / LOOP);
    fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);
  }
  for (k = 0; k < 10; k++) {
    ARMCI_Free(myptrs[k][me]);
  }
  MPI_Barrier(MPI_COMM_WORLD);
  ARMCI_Finalize();
  ARMCI_Finalize();
  armci_msg_finalize();
  return 0;
}