File: ping-pong.c

package info (click to toggle)
armci-mpi 0.0~git20160222-2
  • links: PTS
  • area: main
  • in suites: stretch
  • size: 1,756 kB
  • sloc: ansic: 12,698; sh: 229; makefile: 53; fortran: 44
file content (114 lines) | stat: -rw-r--r-- 3,061 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
 * Copyright (C) 2010. See COPYRIGHT in top-level directory.
 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>

#include <mpi.h>
#include <armci.h>

#define MAX_SIZE   262144
#define NUM_ROUNDS 1000

int main(int argc, char **argv) {
  int        me, nproc, zero, target;
  int        msg_length, round, i;
  double     t_start, t_stop;
  uint8_t  *snd_buf;  // Send buffer (byte array)
  uint8_t **rcv_buf;  // Receive buffer (byte array)

  MPI_Init(&argc, &argv);
  ARMCI_Init();

  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);

  if (nproc < 2)
    ARMCI_Error("This benchmark should be run on at least two processes", 1);

  if (me == 0)
    printf("ARMCI ping-pong latency test, performing %d rounds at each xfer size.\n", NUM_ROUNDS);

  rcv_buf = malloc(nproc*sizeof(void*));

  ARMCI_Malloc((void*)rcv_buf, MAX_SIZE);
  snd_buf = ARMCI_Malloc_local(MAX_SIZE);

  zero = 0;

  for (i = 0; i < MAX_SIZE; i++) {
    snd_buf[i] = 1;
  }

  for (target = 1; target < nproc; target++) {
    if (me == 0) printf("\n========== Process pair: %d and %d ==========\n\n", 0, target);

    for (msg_length = 1; msg_length <= MAX_SIZE; msg_length *= 2) {
      ARMCI_Barrier();
      t_start = MPI_Wtime();

      if (me == 0 || me == target) {

        // Perform NUM_ROUNDS ping-pongs
        for (round = 0; round < NUM_ROUNDS*2; round++) {
          int my_target = me == 0 ? target : 0;

          // I am the sender
          if (round % 2 == me) {
            if ((round % 2 == 0 && me == 0) || (round % 2 != 0 && me != 0)) {
              // Clear start and end markers for next round
#ifdef DIRECT_ACCESS
              ((uint8_t*)rcv_buf[me])[0] = 0;
              ((uint8_t*)rcv_buf[me])[msg_length-1] = 0;
#else
              ARMCI_Put(&zero, &(((uint8_t*)rcv_buf[me])[0]),            1, me);
              ARMCI_Put(&zero, &(((uint8_t*)rcv_buf[me])[msg_length-1]), 1, me);
#endif

              ARMCI_Put(snd_buf, rcv_buf[my_target], msg_length, my_target);
              ARMCI_Fence(my_target); // This is optional, we don't need notification
            }

            // I am the receiver
            else {
#ifdef DIRECT_ACCESS
              while (((volatile uint8_t*)rcv_buf[me])[0] == 0) ;
              while (((volatile uint8_t*)rcv_buf[me])[msg_length-1] == 0) ;
#else
              uint8_t val;

              do {
                ARMCI_Get(&(((uint8_t*)rcv_buf[me])[0]), &val, 1, me);
              } while (val == 0);

              do {
                ARMCI_Get(&(((uint8_t*)rcv_buf[me])[msg_length-1]), &val, 1, me);
              } while (val == 0);
#endif
            }
          }
        }
      }

      ARMCI_Barrier(); // FIXME: Time here increases with nproc :(
      t_stop = MPI_Wtime();

      if (me == 0)
        printf("%8d bytes \t %12.8f us\n", msg_length, (t_stop-t_start)/NUM_ROUNDS*1.0e6);
    }

    ARMCI_Barrier();
  }

  ARMCI_Free(rcv_buf[me]);
  free(rcv_buf);
  ARMCI_Free_local(snd_buf);

  ARMCI_Finalize();
  MPI_Finalize();

  return 0;
}