1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
|
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED
#define OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED
#include "opal_config.h"
#include <stddef.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#ifdef HAVE_NET_UIO_H
#include <net/uio.h>
#endif
#include "opal/constants.h"
#include "opal/datatype/opal_datatype.h"
#include "opal/prefetch.h"
BEGIN_C_DECLS
/*
* CONVERTOR SECTION
*/
/* keep the last 16 bits free for data flags */
#define CONVERTOR_DATATYPE_MASK 0x0000FFFF
#define CONVERTOR_SEND_CONVERSION 0x00010000
#define CONVERTOR_RECV 0x00020000
#define CONVERTOR_SEND 0x00040000
#define CONVERTOR_HOMOGENEOUS 0x00080000
#define CONVERTOR_NO_OP 0x00100000
#define CONVERTOR_WITH_CHECKSUM 0x00200000
#define CONVERTOR_TYPE_MASK 0x00FF0000
#define CONVERTOR_STATE_START 0x01000000
#define CONVERTOR_STATE_COMPLETE 0x02000000
#define CONVERTOR_STATE_ALLOC 0x04000000
#define CONVERTOR_COMPLETED 0x08000000
union dt_elem_desc;
typedef struct opal_convertor_t opal_convertor_t;
typedef int32_t (*convertor_advance_fct_t)( opal_convertor_t* pConvertor,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data );
typedef void*(*memalloc_fct_t)( size_t* pLength, void* userdata );
/* The master convertor struct (defined in convertor_internal.h) */
struct opal_convertor_master_t;
struct dt_stack_t {
int32_t index; /**< index in the element description */
int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */
size_t count; /**< number of times we still have to do it */
OPAL_PTRDIFF_TYPE disp; /**< actual displacement depending on the count field */
};
typedef struct dt_stack_t dt_stack_t;
/**
*
*/
#define DT_STATIC_STACK_SIZE 5 /**< This should be sufficient for most applications */
struct opal_convertor_t {
opal_object_t super; /**< basic superclass */
uint32_t remoteArch; /**< the remote architecture */
uint32_t flags; /**< the properties of this convertor */
size_t local_size; /**< overall length data on local machine, compared to bConverted */
size_t remote_size; /**< overall length data on remote machine, compared to bConverted */
const opal_datatype_t* pDesc; /**< the datatype description associated with the convertor */
const dt_type_desc_t* use_desc; /**< the version used by the convertor (normal or optimized) */
opal_datatype_count_t count; /**< the total number of full datatype elements */
uint32_t stack_size; /**< size of the allocated stack */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned char* pBaseBuf; /**< initial buffer as supplied by the user */
dt_stack_t* pStack; /**< the local stack for the actual conversion */
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
struct opal_convertor_master_t* master; /**< the master convertor */
/* All others fields get modified for every call to pack/unpack functions */
uint32_t stack_pos; /**< the actual position on the stack */
uint32_t partial_length; /**< amount of data left over from the last unpack */
size_t bConverted; /**< # of bytes already converted */
uint32_t checksum; /**< checksum computed by pack/unpack operation */
uint32_t csum_ui1; /**< partial checksum computed by pack/unpack operation */
size_t csum_ui2; /**< partial checksum computed by pack/unpack operation */
/* --- cacheline 2 boundary (128 bytes) --- */
dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */
/* --- cacheline 3 boundary (192 bytes) was 56 bytes ago --- */
/* size: 248, cachelines: 4, members: 20 */
/* last cacheline: 56 bytes */
};
OPAL_DECLSPEC OBJ_CLASS_DECLARATION( opal_convertor_t );
/*
*
*/
static inline uint32_t opal_convertor_get_checksum( opal_convertor_t* convertor )
{
return convertor->checksum;
}
/*
*
*/
OPAL_DECLSPEC int32_t opal_convertor_pack( opal_convertor_t* pConv, struct iovec* iov,
uint32_t* out_size, size_t* max_data );
/*
*
*/
OPAL_DECLSPEC int32_t opal_convertor_unpack( opal_convertor_t* pConv, struct iovec* iov,
uint32_t* out_size, size_t* max_data );
/*
*
*/
OPAL_DECLSPEC opal_convertor_t* opal_convertor_create( int32_t remote_arch, int32_t mode );
/**
* The cleanup function will put the convertor in exactly the same state as after a call
* to opal_convertor_construct. Therefore, all PML can call OBJ_DESTRUCT on the request's
* convertors without having to call OBJ_CONSTRUCT everytime they grab a new one from the
* cache. The OBJ_CONSTRUCT on the convertor should be called only on the first creation
* of a request (not when extracted from the cache).
*/
static inline int opal_convertor_cleanup( opal_convertor_t* convertor )
{
if( OPAL_UNLIKELY(convertor->stack_size > DT_STATIC_STACK_SIZE) ) {
free( convertor->pStack );
convertor->pStack = convertor->static_stack;
convertor->stack_size = DT_STATIC_STACK_SIZE;
}
convertor->pDesc = NULL;
convertor->stack_pos = 0;
convertor->flags = OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED;
return OPAL_SUCCESS;
}
/**
* Return: 0 if no packing is required for sending (the upper layer
* can use directly the pointer to the contiguous user
* buffer).
* 1 if data does need to be packed, i.e. heterogeneous peers
* (source arch != dest arch) or non contiguous memory
* layout.
*/
static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConvertor )
{
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
#endif
if( pConvertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) return 0;
if( (pConvertor->count == 1) && (pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) return 0;
return 1;
}
/*
*
*/
static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv,
size_t* pSize )
{
*pSize = pConv->local_size;
}
/*
*
*/
static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv,
size_t* pSize )
{
*pSize = pConv->remote_size;
}
/**
* Return the current absolute position of the next pack/unpack. This function is
* mostly useful for contiguous datatypes, when we need to get the pointer to the
* contiguous piece of memory.
*/
static inline void opal_convertor_get_current_pointer( const opal_convertor_t* pConv,
void** position )
{
unsigned char* base = pConv->pBaseBuf + pConv->bConverted + pConv->pDesc->true_lb;
*position = (void*)base;
}
/*
*
*/
OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
const struct opal_datatype_t* datatype,
int32_t count,
const void* pUserBuf);
static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv,
const struct opal_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
int32_t flags,
opal_convertor_t* convertor )
{
convertor->remoteArch = pSrcConv->remoteArch;
convertor->flags = pSrcConv->flags | flags;
convertor->master = pSrcConv->master;
return opal_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
}
/*
*
*/
OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
const struct opal_datatype_t* datatype,
int32_t count,
const void* pUserBuf );
static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv,
const struct opal_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
int32_t flags,
opal_convertor_t* convertor )
{
convertor->remoteArch = pSrcConv->remoteArch;
convertor->flags = (pSrcConv->flags | flags);
convertor->master = pSrcConv->master;
return opal_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
}
/*
* Give access to the raw memory layout based on the datatype.
*/
OPAL_DECLSPEC int32_t
opal_convertor_raw( opal_convertor_t* convertor, /* [IN/OUT] */
struct iovec* iov, /* [IN/OUT] */
uint32_t* iov_count, /* [IN/OUT] */
size_t* length ); /* [OUT] */
/*
* Upper level does not need to call the _nocheck function directly.
*/
OPAL_DECLSPEC int32_t
opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
size_t* position );
static inline int32_t
opal_convertor_set_position( opal_convertor_t* convertor,
size_t* position )
{
/*
* Do not allow the convertor to go outside the data boundaries. This test include
* the check for datatype with size zero as well as for convertors with a count of zero.
*/
if( OPAL_UNLIKELY(convertor->local_size <= *position) ) {
convertor->flags |= CONVERTOR_COMPLETED;
convertor->bConverted = convertor->local_size;
*position = convertor->bConverted;
return OPAL_SUCCESS;
}
/*
* If the convertor is already at the correct position we are happy.
*/
if( OPAL_LIKELY((*position) == convertor->bConverted) ) return OPAL_SUCCESS;
/* Remove the completed flag if it's already set */
convertor->flags &= ~CONVERTOR_COMPLETED;
if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) &&
(convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) &&
(convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) {
/* Contiguous and no checkpoint and no homogeneous unpack */
convertor->bConverted = *position;
return OPAL_SUCCESS;
}
return opal_convertor_set_position_nocheck( convertor, position );
}
/*
*
*/
static inline int32_t
opal_convertor_personalize( opal_convertor_t* convertor,
uint32_t flags,
size_t* position )
{
convertor->flags |= flags;
if( OPAL_UNLIKELY(NULL == position) )
return OPAL_SUCCESS;
return opal_convertor_set_position( convertor, position );
}
/*
*
*/
OPAL_DECLSPEC int
opal_convertor_clone( const opal_convertor_t* source,
opal_convertor_t* destination,
int32_t copy_stack );
static inline int
opal_convertor_clone_with_position( const opal_convertor_t* source,
opal_convertor_t* destination,
int32_t copy_stack,
size_t* position )
{
(void)opal_convertor_clone( source, destination, copy_stack );
return opal_convertor_set_position( destination, position );
}
/*
*
*/
OPAL_DECLSPEC void
opal_convertor_dump( opal_convertor_t* convertor );
OPAL_DECLSPEC void
opal_datatype_dump_stack( const dt_stack_t* pStack,
int stack_pos,
const union dt_elem_desc* pDesc,
const char* name );
/*
*
*/
OPAL_DECLSPEC int
opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
size_t* position );
END_C_DECLS
#endif /* OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED */
|