1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_ugni_rdma.h"
#include "btl_ugni_smsg.h"
/*
* taken from osc_rdma_comm.h, ugh.
*/
#define ALIGNMENT_MASK(x) ((x) ? (x) -1 : 0)
int mca_btl_ugni_get(mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address,
mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext,
void *cbdata)
{
bool check;
/* Check if the get is aligned/sized on a multiple of 4 */
check = !!((remote_address | (uint64_t)(intptr_t) local_address | size)
& ALIGNMENT_MASK(mca_btl_ugni_module.super.btl_get_alignment));
if (OPAL_UNLIKELY(check || size > mca_btl_ugni_module.super.btl_get_limit)) {
BTL_VERBOSE(("RDMA/FMA Get not available due to size or alignment restrictions"));
/* notify the caller that get is not available */
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("Using RDMA/FMA Get %lu bytes to local address %p to remote address %" PRIx64,
(unsigned long) size, local_address, remote_address));
return mca_btl_ugni_post(endpoint, true, size, local_address, remote_address, local_handle,
remote_handle, order, cbfunc, cbcontext, cbdata);
}
/* eager get */
static void
mca_btl_ugni_callback_eager_get_progress_pending(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *desc, int rc)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_base_frag_t *pending_frag, *frag = (mca_btl_ugni_base_frag_t *) desc;
memset(&frag->hdr, 0, sizeof(frag->hdr));
OPAL_THREAD_LOCK(&ugni_module->eager_get_pending_lock);
pending_frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first(
&ugni_module->eager_get_pending);
OPAL_THREAD_UNLOCK(&ugni_module->eager_get_pending_lock);
if (NULL != pending_frag) {
/* copy the relevant data out of the pending fragment */
frag->endpoint = pending_frag->endpoint;
assert(frag != pending_frag);
/* start the next eager get using this fragment */
(void) mca_btl_ugni_start_eager_get(frag->endpoint, pending_frag->hdr.eager_ex, frag);
/* return the temporary fragment */
mca_btl_ugni_frag_return(pending_frag);
} else {
/* not needed anymore */
mca_btl_ugni_frag_return(frag);
}
}
static void mca_btl_ugni_callback_eager_get(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
void *local_address,
mca_btl_base_registration_handle_t *local_handle,
void *context, void *cbdata, int status)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) context;
uint32_t len = frag->hdr.eager.send.lag & 0x00ffffff;
uint8_t tag = frag->hdr.eager.send.lag >> 24;
size_t payload_len = frag->hdr.eager.size;
size_t hdr_len = len - payload_len;
mca_btl_active_message_callback_t *reg = mca_btl_base_active_message_trigger + tag;
mca_btl_base_segment_t segs[2];
const mca_btl_base_receive_descriptor_t tmp = {.endpoint = endpoint,
.des_segments = segs,
.des_segment_count = hdr_len ? 2 : 1,
.tag = tag,
.cbdata = reg->cbdata};
int rc;
BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx))
if (hdr_len) {
segs[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
segs[0].seg_len = hdr_len;
segs[1].seg_addr.pval = local_address;
segs[1].seg_len = payload_len;
} else {
segs[0].seg_addr.pval = local_address;
segs[0].seg_len = payload_len;
}
reg->cbfunc(&ugni_module->super, &tmp);
/* fill in the response header */
frag->hdr.rdma.ctx = frag->hdr.eager.ctx;
frag->flags = MCA_BTL_UGNI_FRAG_RESPONSE;
frag->ref_cnt = 1;
frag->ref_cnt = 1;
/* once complete use this fragment for a pending eager get if any exist */
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get_progress_pending;
/* tell the remote peer the operation is complete */
rc = opal_mca_btl_ugni_smsg_send(frag, &frag->hdr.rdma, sizeof(frag->hdr.rdma), NULL, 0,
MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
if (OPAL_UNLIKELY(0 > rc)) {
/* queue fragment */
mca_btl_ugni_wait_list_append(ugni_module, endpoint, frag);
}
}
int mca_btl_ugni_start_eager_get(mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
mca_btl_ugni_base_frag_t *frag)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl(endpoint);
size_t size;
int rc = OPAL_SUCCESS;
BTL_VERBOSE(("starting eager get for remote ctx: %p", hdr.eager.ctx));
do {
if (NULL == frag) {
/* try to allocate a registered buffer */
frag = mca_btl_ugni_frag_alloc_eager_recv(endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
/* no registered buffers available. try again later */
frag = mca_btl_ugni_frag_alloc_rdma_int(endpoint);
/* not much can be done if a small fragment can not be allocated. abort! */
assert(NULL != frag);
frag->hdr.eager_ex = hdr;
break;
}
}
frag->flags = 0;
frag->hdr.eager_ex = hdr;
/* increase size to a multiple of 4 bytes (required for get on Gemini) */
size = (hdr.eager.size + 3) & ~3;
/* set up callback for get completion */
frag->base.des_flags = MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
/* start the get */
rc = mca_btl_ugni_post(endpoint, true, size, frag->base.super.ptr, hdr.eager.address,
&frag->memory_handle, &hdr.eager.memory_handle, MCA_BTL_NO_ORDER,
mca_btl_ugni_callback_eager_get, frag, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS == rc)) {
return OPAL_SUCCESS;
}
} while (0);
OPAL_THREAD_LOCK(&ugni_module->eager_get_pending_lock);
opal_list_append(&ugni_module->eager_get_pending, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&ugni_module->eager_get_pending_lock);
return rc;
}
|