File: btl_ugni_send.c

package info (click to toggle)
openmpi 5.0.8-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 201,684 kB
  • sloc: ansic: 613,078; makefile: 42,353; sh: 11,194; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,179; python: 1,859; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (201 lines) | stat: -rw-r--r-- 7,457 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
/* -*- Mode: C; c-basic-offset:3 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * Copyright (c) 2011      UT-Battelle, LLC. All rights reserved.
 * Copyright (c) 2014      Research Organization for Information Science
 *                         and Technology (RIST). All rights reserved.
 * Copyright (c) 2017      Intel, Inc.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#include "btl_ugni.h"
#include "btl_ugni_frag.h"
#include "btl_ugni_prepare.h"
#include "btl_ugni_smsg.h"

void mca_btl_ugni_wait_list_append(mca_btl_ugni_module_t *ugni_module,
                                   mca_btl_base_endpoint_t *endpoint,
                                   mca_btl_ugni_base_frag_t *frag)
{
    BTL_VERBOSE(("wait-listing fragment %p to %s. endpoint state %d\n", (void *) frag,
                 OPAL_NAME_PRINT(endpoint->peer_proc->proc_name), endpoint->state));

    frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;

    /* queue up request */
    OPAL_THREAD_LOCK(&endpoint->lock);

    opal_list_append(&endpoint->frag_wait_list, (opal_list_item_t *) frag);

    OPAL_THREAD_UNLOCK(&endpoint->lock);

    if (false == endpoint->wait_listed && MCA_BTL_UGNI_EP_STATE_CONNECTED == endpoint->state) {
        OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
        if (false == endpoint->wait_listed) {
            opal_list_append(&ugni_module->ep_wait_list, &endpoint->super);
            endpoint->wait_listed = true;
        }
        OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
    }
}

int mca_btl_ugni_send(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                      struct mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag)
{
    mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor;
    size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len;
    mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
    int rc;

    /* tag and len are at the same location in eager and smsg frag hdrs */
    frag->hdr.send.lag = (tag << 24) | size;

    BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64,
                 (void *) descriptor, OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid,
                 size));

    rc = mca_btl_ugni_check_endpoint_state(endpoint);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || opal_list_get_size(&endpoint->frag_wait_list))) {
        mca_btl_ugni_wait_list_append(ugni_module, endpoint, frag);
        return OPAL_SUCCESS;
    }

    /* add a reference to prevent the fragment from being returned until after the
     * completion flag is checked. */
    ++frag->ref_cnt;
    frag->flags &= ~MCA_BTL_UGNI_FRAG_COMPLETE;

    rc = mca_btl_ugni_send_frag(endpoint, frag);
    if (OPAL_LIKELY(mca_btl_ugni_frag_check_complete(frag))) {
        /* fast path: remote side has received the frag */
        (void) mca_btl_ugni_frag_del_ref(frag, OPAL_SUCCESS);

        return 1;
    }

    if ((OPAL_SUCCESS == rc) && (frag->flags & MCA_BTL_UGNI_FRAG_BUFFERED)
        && (frag->flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
        /* fast(ish) path: btl owned buffered frag. report send as complete */
        bool call_callback = !!(frag->flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
        frag->flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;

        if (call_callback) {
            frag->base.des_cbfunc(&ugni_module->super, frag->endpoint, &frag->base, rc);
        }

        (void) mca_btl_ugni_frag_del_ref(frag, OPAL_SUCCESS);

        return 1;
    }

    /* slow(ish) path: remote side hasn't received the frag. call the frag's callback when
       we get the local smsg/msgq or remote rdma completion */
    frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;

    mca_btl_ugni_frag_del_ref(frag, OPAL_SUCCESS);

    if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
        /* queue up request */
        mca_btl_ugni_wait_list_append(ugni_module, endpoint, frag);
        rc = OPAL_SUCCESS;
    }

    return rc;
}

int mca_btl_ugni_sendi(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                       struct opal_convertor_t *convertor, void *header, size_t header_size,
                       size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag,
                       mca_btl_base_descriptor_t **descriptor)
{
    size_t total_size = header_size + payload_size;
    mca_btl_ugni_base_frag_t *frag = NULL;
    size_t packed_size = payload_size;
    int rc;

    if (OPAL_UNLIKELY(opal_list_get_size(&endpoint->frag_wait_list))) {
        if (NULL != descriptor) {
            *descriptor = NULL;
        }
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    do {
        BTL_VERBOSE(("btl/ugni isend sending fragment from %d -> %d. length = %" PRIu64
                     " endpoint state %d",
                     OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid,
                     payload_size + header_size, endpoint->state));

        flags |= MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;

        if (0 == payload_size) {
            frag = (mca_btl_ugni_base_frag_t *)
                mca_btl_ugni_prepare_src_send_nodata(btl, endpoint, order, header_size, flags);
        } else {
            frag = (mca_btl_ugni_base_frag_t *)
                mca_btl_ugni_prepare_src_send_buffered(btl, endpoint, convertor, order, header_size,
                                                       &packed_size, flags);
        }

        assert(packed_size == payload_size);
        if (OPAL_UNLIKELY(NULL == frag
                          || OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state(endpoint))) {
            break;
        }

        frag->hdr.send.lag = (tag << 24) | total_size;
        memcpy(frag->segments[0].seg_addr.pval, header, header_size);

        rc = mca_btl_ugni_send_frag(endpoint, frag);
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
            break;
        }

        return OPAL_SUCCESS;
    } while (0);

    if (NULL != descriptor) {
        *descriptor = &frag->base;
    }

    return OPAL_ERR_OUT_OF_RESOURCE;
}

int mca_btl_ugni_progress_send_wait_list(mca_btl_base_endpoint_t *endpoint)
{
    mca_btl_ugni_base_frag_t *frag = NULL;
    int rc;

    do {
        OPAL_THREAD_LOCK(&endpoint->lock);
        frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first(&endpoint->frag_wait_list);
        OPAL_THREAD_UNLOCK(&endpoint->lock);
        if (NULL == frag) {
            break;
        }
        if (OPAL_LIKELY(!(frag->flags & MCA_BTL_UGNI_FRAG_RESPONSE))) {
            rc = mca_btl_ugni_send_frag(endpoint, frag);
        } else {
            rc = opal_mca_btl_ugni_smsg_send(frag, &frag->hdr.rdma, sizeof(frag->hdr.rdma), NULL, 0,
                                             MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
        }

        if (OPAL_UNLIKELY(OPAL_SUCCESS > rc)) {
            if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
                OPAL_THREAD_LOCK(&endpoint->lock);
                opal_list_prepend(&endpoint->frag_wait_list, (opal_list_item_t *) frag);
                OPAL_THREAD_UNLOCK(&endpoint->lock);
            } else {
                mca_btl_ugni_frag_complete(frag, rc);
            }
            return rc;
        }
    } while (1);

    return OPAL_SUCCESS;
}