1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
|
/*
* Copyright (c) 2016-2017 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _OFI_PROTO_H_
#define _OFI_PROTO_H_
#include "config.h"
#include <stdint.h>
#include <stddef.h>
#include <rdma/fi_rma.h>
#ifdef __cplusplus
extern "C" {
#endif
#define OFI_CTRL_VERSION 2
/* ofi_ctrl_hdr::type */
enum {
ofi_ctrl_connreq,
ofi_ctrl_connresp,
ofi_ctrl_start_data,
ofi_ctrl_data,
ofi_ctrl_large_data,
ofi_ctrl_ack,
ofi_ctrl_nack,
ofi_ctrl_discard,
ofi_ctrl_seg_data,
ofi_ctrl_atomic,
ofi_ctrl_atomic_resp,
};
/*
* Control message header. For segmentation and reassembly, reliability,
* rendezvous protocol, acks, and communication setup.
*
* version: OFI_CTRL_VERSION
* type
* conn_id: Communication identifier. Conn_id values are exchanged between
* peer endpoints as part of communication setup. This field is valid
* as part of the first message in any data transfer.
* msg_id: This is the sender's identifier for a message.
* Unique number identifying all segments of a message
* Message id can be formed using an equation similar to:
* (seq_no++ << tx size) | tx_key
* seg_size:
* Data packets - size of current message, in bytes.
* Large data packets - size of current message, 2 ^ seg_size, in bytes
* Ctrl packets - number of segments in window allowed past seg_no.
* seg_no:
* Data packets - position 0..(n-1) of segment in current message.
* Ctrl packets - last segment ack'ed.
* conn_data: Connection specific data. This may be set to the index
* of the transmit endpoint's address in its local AV, which may
* be used as a hint at the Rx side to locate the Tx EP address in
* its AV. The assumption is that all addresses were inserted into
* all AVs across the fabric using a copied array. (This is an
* optimization hint only; the peer validates the actual entry.)
* rx_key: This is the receiver's identifier for a message (receive side
* equivalent of msg_id). Key returned by the Rx side, that the
* Tx side includes in subsequent packets. This field is used for
* rendezvous protocol.
* The rx_key may be formed similar to message_id.
* ctrl_data: This is provider specific data for remote side
*/
struct ofi_ctrl_hdr {
uint8_t version;
uint8_t type;
uint16_t seg_size;
uint32_t seg_no;
uint64_t conn_id;
uint64_t msg_id;
union {
uint64_t conn_data;
uint64_t rx_key;
uint64_t ctrl_data;
};
};
#define OFI_OP_VERSION 2
/*
* Basic command opcode. ofi_op_hdr::op
* Intent is that RX can use opcode + control as indices into a function
* pointer array for message processing (after validating values).
*/
enum {
ofi_op_msg,
ofi_op_tagged,
ofi_op_read_req,
ofi_op_read_rsp,
ofi_op_write,
ofi_op_write_async,
ofi_op_atomic,
ofi_op_atomic_fetch,
ofi_op_atomic_compare,
ofi_op_read_async,
ofi_op_max,
};
#define OFI_REMOTE_CQ_DATA (1 << 0)
#define OFI_TRANSMIT_COMPLETE (1 << 1)
#define OFI_DELIVERY_COMPLETE (1 << 2)
#define OFI_COMMIT_COMPLETE (1 << 3)
/*
* Common command header
*
* version: OFI_OP_VERSION
* rxid: RX index for scalable endpoints
* op:
* op_data: implementation specific
* tx_key: Tx request identifier for command
* flags: Command flags
* size: Size of data transfer
* data: Remote CQ data, if available
* tag: Message tag, used for tagged operations only
* iov_count: Count of destination iov, used for RMA operations
* atomic: Control fields for atomic operations
* remote_idx: Tx request identifier of remote side
* resv: Reserved, used for msg operations
*/
struct ofi_op_hdr {
uint8_t version;
uint8_t rx_index;
uint8_t op;
uint8_t op_data;
uint32_t flags;
uint64_t size;
uint64_t data;
union {
uint64_t tag;
uint8_t iov_count;
struct {
uint8_t datatype;
uint8_t op;
uint8_t ioc_count;
} atomic;
uint64_t remote_idx;
uint64_t resv;
};
};
struct ofi_iov {
uint64_t addr;
uint64_t len;
};
struct ofi_rma_iov {
uint64_t addr;
uint64_t len;
uint64_t key;
};
struct ofi_rma_ioc {
uint64_t addr;
uint64_t count;
uint64_t key;
};
#define OFI_CMD_SIZE 64 /* to align with 64-byte cache line */
#define OFI_CMD_DATA_LEN (OFI_CMD_SIZE - sizeof(struct ofi_ctrl_hdr))
#ifdef __cplusplus
}
#endif
#endif /* _OFI_PROTO_H_ */
|