File: btl_smcuda_fifo.h

package info (click to toggle)
openmpi 5.0.9-2
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 202,076 kB
  • sloc: ansic: 614,605; makefile: 42,348; sh: 11,201; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,192; python: 1,862; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (110 lines) | stat: -rw-r--r-- 5,288 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2012 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2006-2007 Voltaire. All rights reserved.
 * Copyright (c) 2009-2010 Cisco Systems, Inc.  All rights reserved.
 * Copyright (c) 2010-2015 Los Alamos National Security, LLC.
 *                         All rights reserved.
 * Copyright (c) 2010-2012 IBM Corporation.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */
#ifndef MCA_BTL_SMCUDA_FIFO_H
#define MCA_BTL_SMCUDA_FIFO_H

#include "btl_smcuda.h"
#include "btl_smcuda_endpoint.h"

static void add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend)
{
    btl_smcuda_pending_send_item_t *si;
    opal_free_list_item_t *i;
    i = opal_free_list_get(&mca_btl_smcuda_component.pending_send_fl);

    /* don't handle error for now */
    assert(i != NULL);

    si = (btl_smcuda_pending_send_item_t *) i;
    si->data = data;

    OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, +1);

    /* if data was on pending send list then prepend it to the list to
     * minimize reordering */
    OPAL_THREAD_LOCK(&ep->endpoint_lock);
    if (resend)
        opal_list_prepend(&ep->pending_sends, (opal_list_item_t *) si);
    else
        opal_list_append(&ep->pending_sends, (opal_list_item_t *) si);
    OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
}

/*
 * FIFO_MAP(x) defines which FIFO on the receiver should be used
 * by sender rank x.  The map is some many-to-one hash.
 *
 * FIFO_MAP_NUM(n) defines how many FIFOs the receiver has for
 * n senders.
 *
 * That is,
 *
 *      for all    0 <= x < n:
 *
 *              0 <= FIFO_MAP(x) < FIFO_MAP_NUM(n)
 *
 * For example, using some power-of-two nfifos, we could have
 *
 *    FIFO_MAP(x)     = x & (nfifos-1)
 *    FIFO_MAP_NUM(n) = min(nfifos,n)
 *
 * Interesting limits include:
 *
 *    nfifos very large:  In this case, each sender has its
 *       own dedicated FIFO on each receiver and the receiver
 *       has one FIFO per sender.
 *
 *    nfifos == 1:  In this case, all senders use the same
 *       FIFO and each receiver has just one FIFO for all senders.
 */
#define FIFO_MAP(x) ((x) & (mca_btl_smcuda_component.nfifos - 1))
#define FIFO_MAP_NUM(n) \
    ((mca_btl_smcuda_component.nfifos) < (n) ? (mca_btl_smcuda_component.nfifos) : (n))

#define MCA_BTL_SMCUDA_FIFO_WRITE(endpoint_peer, my_smp_rank, peer_smp_rank, hdr, resend,         \
                                  retry_pending_sends, rc)                                        \
    do {                                                                                          \
        /* memory barrier: ensure writes to the hdr have completed */                             \
        opal_atomic_wmb();                                                                        \
        sm_fifo_t *fifo = &(mca_btl_smcuda_component.fifo[peer_smp_rank][FIFO_MAP(my_smp_rank)]); \
                                                                                                  \
        if (retry_pending_sends) {                                                                \
            if (0 < opal_list_get_size(&endpoint_peer->pending_sends)) {                          \
                btl_smcuda_process_pending_sends(endpoint_peer);                                  \
            }                                                                                     \
        }                                                                                         \
                                                                                                  \
        opal_atomic_lock(&(fifo->head_lock));                                                     \
        /* post fragment */                                                                       \
        if (sm_fifo_write(hdr, fifo) != OPAL_SUCCESS) {                                           \
            add_pending(endpoint_peer, hdr, resend);                                              \
            rc = OPAL_ERR_RESOURCE_BUSY;                                                          \
        } else {                                                                                  \
            MCA_BTL_SMCUDA_SIGNAL_PEER(endpoint_peer);                                            \
            rc = OPAL_SUCCESS;                                                                    \
        }                                                                                         \
        opal_atomic_unlock(&(fifo->head_lock));                                                   \
    } while (0)

#endif