File: rmw_perf.c

package info (click to toggle)
armci-mpi 0.3.1~beta-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,792 kB
  • sloc: ansic: 13,028; sh: 243; makefile: 58; fortran: 44
file content (115 lines) | stat: -rw-r--r-- 2,837 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
 * Copyright (C) 2010. See COPYRIGHT in top-level directory.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <mpi.h>
#include <armci.h>

#ifdef USE_ARMCI_LONG
#  define INC_TYPE long
#  define ARMCI_OP ARMCI_FETCH_AND_ADD_LONG
#else
#  define INC_TYPE int
#  define ARMCI_OP ARMCI_FETCH_AND_ADD
#endif

int main(int argc, char* argv[])
{
    int provided;
#ifdef __bgp__
    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    assert(provided==MPI_THREAD_MULTIPLE);
#else
    MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
#endif

    int rank, nproc;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (nproc<2) {
        printf("This benchmark requires >1 MPI processes\n");
        MPI_Finalize();
        return 1;
    }

    ARMCI_Init();

    int count = ( argc > 1 ? atoi(argv[1]) : 1000000 );

    char * cfair = getenv ("CHECK_FAIRNESS");
    int check_fairness = (cfair!=NULL) ? 1 : 0;

    int * complete = (int *) malloc(sizeof(int) * count);
    for(int i=0; i<count; i++) complete[i] = 0;

    void ** base_ptrs = malloc(sizeof(void*)*nproc);
    ARMCI_Malloc(base_ptrs, sizeof(INC_TYPE));

    ARMCI_Access_begin(base_ptrs[rank]);
    *(int*) base_ptrs[rank] = 0;
    ARMCI_Access_end(base_ptrs[rank]);

    ARMCI_Barrier();

    complete = (int *) malloc(sizeof(int) * count);

    if (rank == 0) {
        printf("ARMCI_Rmw Test - in usec \n");
        fflush(stdout);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    double tt = 0;
    int nrecv = 0;
    if (rank>0)
    {
        int target = 0;
        INC_TYPE val = -1;
        double t0 = MPI_Wtime();
        while(val < count) {
            ARMCI_Rmw(ARMCI_OP, &val, base_ptrs[target], 1, target);
            if (val < count) {
                complete[val] = rank;
                nrecv++;
            }
        }
        double t1 = MPI_Wtime();
        tt = (t1-t0);
    }
    MPI_Allreduce(MPI_IN_PLACE, complete, count, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

    double dt = 1.e6*(double)tt/(double)nrecv;
    if(nrecv>0)
        printf("process %d received %d counters in %lf seconds (%lf microseconds per call)\n",
               rank, nrecv, tt, dt);
    fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    if (0==rank && check_fairness==1) {
        printf("Checking for fairness...\n");
        fflush(stdout);
        for(int i=0; i<count; i++) {
            printf("counter value %d %s received (rank %d) \n", i,
                   0==complete[i] ? "was NOT" : "was",
                   0==complete[i] ? -911 : complete[i] );
        }
        fflush(stdout);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    ARMCI_Free(base_ptrs[rank]);
    free(base_ptrs);

    free(complete);

    ARMCI_Finalize();
    MPI_Finalize();

    return 0;
}