1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
|
/* oshmem/include/shmemx.h. This file contains vendor extension functions */
/*
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OSHMEM_SHMEMX_H
#define OSHMEM_SHMEMX_H
#include <shmem.h>
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
enum {
SHMEM_HINT_NONE = 0,
SHMEM_HINT_LOW_LAT_MEM = 1 << 0,
SHMEM_HINT_HIGH_BW_MEM = 1 << 1,
SHMEM_HINT_NEAR_NIC_MEM = 1 << 2,
SHMEM_HINT_DEVICE_GPU_MEM = 1 << 3,
SHMEM_HINT_DEVICE_NIC_MEM = 1 << 4,
SHMEM_HINT_PSYNC = 1 << 16,
SHMEM_HINT_PWORK = 1 << 17,
SHMEM_HINT_ATOMICS = 1 << 18
};
/*
* All OpenSHMEM extension APIs that are not part of this specification must be defined in the shmemx.h include
* file. These extensions shall use the shmemx_ prefix for all routine, variable, and constant names.
*/
/*
* Elemental put routines
*/
OSHMEM_DECLSPEC void shmemx_int16_p(int16_t* addr, int16_t value, int pe);
OSHMEM_DECLSPEC void shmemx_int32_p(int32_t* addr, int32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_int64_p(int64_t* addr, int64_t value, int pe);
/*
* Elemental put routines
*/
/*
* Block data put routines
*/
/*
* Strided put routines
*/
/*
* Elemental get routines
*/
OSHMEM_DECLSPEC int16_t shmemx_int16_g(const int16_t* addr, int pe);
OSHMEM_DECLSPEC int32_t shmemx_int32_g(const int32_t* addr, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_g(const int64_t* addr, int pe);
/*
* Block data get routines
*/
/*
* Strided get routines
*/
/*
* Atomic operations
*/
/* Atomic swap */
OSHMEM_DECLSPEC int32_t shmemx_int32_swap(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_swap(int64_t *target, int64_t value, int pe);
/* Atomic set */
OSHMEM_DECLSPEC void shmemx_int32_set(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_int64_set(int64_t *target, int64_t value, int pe);
/* Atomic conditional swap */
OSHMEM_DECLSPEC int32_t shmemx_int32_cswap(int32_t *target, int32_t cond, int32_t value, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_cswap(int64_t *target, int64_t cond, int64_t value, int pe);
/* Atomic Fetch&Add */
OSHMEM_DECLSPEC int32_t shmemx_int32_fadd(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_fadd(int64_t *target, int64_t value, int pe);
/* Atomic Fetch&And */
OSHMEM_DECLSPEC int32_t shmemx_int32_atomic_fetch_and(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_atomic_fetch_and(int64_t *target, int64_t value, int pe);
OSHMEM_DECLSPEC uint32_t shmemx_uint32_atomic_fetch_and(uint32_t *target, uint32_t value, int pe);
OSHMEM_DECLSPEC uint64_t shmemx_uint64_atomic_fetch_and(uint64_t *target, uint64_t value, int pe);
/* Atomic Fetch&Or */
OSHMEM_DECLSPEC int32_t shmemx_int32_atomic_fetch_or(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_atomic_fetch_or(int64_t *target, int64_t value, int pe);
OSHMEM_DECLSPEC uint32_t shmemx_uint32_atomic_fetch_or(uint32_t *target, uint32_t value, int pe);
OSHMEM_DECLSPEC uint64_t shmemx_uint64_atomic_fetch_or(uint64_t *target, uint64_t value, int pe);
/* Atomic Fetch&Xor */
OSHMEM_DECLSPEC int32_t shmemx_int32_atomic_fetch_xor(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_atomic_fetch_xor(int64_t *target, int64_t value, int pe);
OSHMEM_DECLSPEC uint32_t shmemx_uint32_atomic_fetch_xor(uint32_t *target, uint32_t value, int pe);
OSHMEM_DECLSPEC uint64_t shmemx_uint64_atomic_fetch_xor(uint64_t *target, uint64_t value, int pe);
/* Atomic Fetch */
OSHMEM_DECLSPEC int32_t shmemx_int32_fetch(const int32_t *target, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_fetch(const int64_t *target, int pe);
/* Atomic Fetch&Inc */
OSHMEM_DECLSPEC int32_t shmemx_int32_finc(int32_t *target, int pe);
OSHMEM_DECLSPEC int64_t shmemx_int64_finc(int64_t *target, int pe);
/* Atomic Add */
OSHMEM_DECLSPEC void shmemx_int32_add(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_int64_add(int64_t *target, int64_t value, int pe);
/* Atomic And */
OSHMEM_DECLSPEC void shmemx_int32_atomic_and(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_int64_atomic_and(int64_t *target, int64_t value, int pe);
OSHMEM_DECLSPEC void shmemx_uint32_atomic_and(uint32_t *target, uint32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_uint64_atomic_and(uint64_t *target, uint64_t value, int pe);
/* Atomic Or */
OSHMEM_DECLSPEC void shmemx_int32_atomic_or(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_int64_atomic_or(int64_t *target, int64_t value, int pe);
OSHMEM_DECLSPEC void shmemx_uint32_atomic_or(uint32_t *target, uint32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_uint64_atomic_or(uint64_t *target, uint64_t value, int pe);
/* Atomic Xor */
OSHMEM_DECLSPEC void shmemx_int32_atomic_xor(int32_t *target, int32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_int64_atomic_xor(int64_t *target, int64_t value, int pe);
OSHMEM_DECLSPEC void shmemx_uint32_atomic_xor(uint32_t *target, uint32_t value, int pe);
OSHMEM_DECLSPEC void shmemx_uint64_atomic_xor(uint64_t *target, uint64_t value, int pe);
/* Atomic Inc */
OSHMEM_DECLSPEC void shmemx_int32_inc(int32_t *target, int pe);
OSHMEM_DECLSPEC void shmemx_int64_inc(int64_t *target, int pe);
/*
* P2P sync routines
*/
OSHMEM_DECLSPEC void shmemx_int32_wait(int32_t *addr, int32_t value);
OSHMEM_DECLSPEC void shmemx_int64_wait(int64_t *addr, int64_t value);
OSHMEM_DECLSPEC void shmemx_int32_wait_until(int32_t *addr, int cmp, int32_t value);
OSHMEM_DECLSPEC void shmemx_int64_wait_until(int64_t *addr, int cmp, int64_t value);
/*
* Reduction routines
*/
OSHMEM_DECLSPEC void shmemx_int16_and_to_all(int16_t *target, const int16_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int16_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int32_and_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_and_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int16_or_to_all(int16_t *target, const int16_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int16_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int32_or_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_or_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int16_xor_to_all(int16_t *target, const int16_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int16_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int32_xor_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_xor_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int16_max_to_all(int16_t *target, const int16_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int16_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int32_max_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_max_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int16_min_to_all(int16_t *target, const int16_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int16_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int32_min_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_min_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int16_sum_to_all(int16_t *target, const int16_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int16_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int32_sum_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_sum_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int16_prod_to_all(int16_t *target, const int16_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int16_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int32_prod_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_prod_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
/* shmemx_alltoall_global_nb is a nonblocking collective routine, where each PE
* exchanges “size” bytes of data with all other PEs in the OpenSHMEM job.
* @param dest A symmetric data object that is large enough to receive
* “size” bytes of data from each PE in the OpenSHMEM job.
* @param source A symmetric data object that contains “size” bytes of data
* for each PE in the OpenSHMEM job.
* @param size The number of bytes to be sent to each PE in the job.
* @param counter A symmetric data object to be atomically incremented after
* the target buffer is updated.
*
* @return OSHMEM_SUCCESS or failure status.
*/
OSHMEM_DECLSPEC void shmemx_alltoall_global_nb(void *dest, const void *source, size_t size, long *counter);
/*
* Backward compatibility section
*/
#define shmem_int32_swap shmemx_int32_swap
#define shmem_int64_swap shmemx_int64_swap
#define shmem_int32_set shmemx_int32_set
#define shmem_int64_set shmemx_int64_set
#define shmem_int32_cswap shmemx_int32_cswap
#define shmem_int64_cswap shmemx_int64_cswap
#define shmem_int32_fadd shmemx_int32_fadd
#define shmem_int64_fadd shmemx_int64_fadd
#define shmem_int32_fetch shmemx_int32_fetch
#define shmem_int64_fetch shmemx_int64_fetch
#define shmem_int32_finc shmemx_int32_finc
#define shmem_int64_finc shmemx_int64_finc
#define shmem_int32_add shmemx_int32_add
#define shmem_int64_add shmemx_int64_add
#define shmem_int32_inc shmemx_int32_inc
#define shmem_int64_inc shmemx_int64_inc
#define shmem_int32_wait shmemx_int32_wait
#define shmem_int64_wait shmemx_int64_wait
#define shmem_int16_and_to_all shmemx_int16_and_to_all
#define shmem_int32_and_to_all shmemx_int32_and_to_all
#define shmem_int64_and_to_all shmemx_int64_and_to_all
#define shmem_int16_or_to_all shmemx_int16_or_to_all
#define shmem_int32_or_to_all shmemx_int32_or_to_all
#define shmem_int64_or_to_all shmemx_int64_or_to_all
#define shmem_int16_xor_to_all shmemx_int16_xor_to_all
#define shmem_int32_xor_to_all shmemx_int32_xor_to_all
#define shmem_int64_xor_to_all shmemx_int64_xor_to_all
#define shmem_int16_max_to_all shmemx_int16_max_to_all
#define shmem_int32_max_to_all shmemx_int32_max_to_all
#define shmem_int64_max_to_all shmemx_int64_max_to_all
#define shmem_int16_min_to_all shmemx_int16_min_to_all
#define shmem_int32_min_to_all shmemx_int32_min_to_all
#define shmem_int64_min_to_all shmemx_int64_min_to_all
#define shmem_int16_sum_to_all shmemx_int16_sum_to_all
#define shmem_int32_sum_to_all shmemx_int32_sum_to_all
#define shmem_int64_sum_to_all shmemx_int64_sum_to_all
#define shmem_int16_prod_to_all shmemx_int16_prod_to_all
#define shmem_int32_prod_to_all shmemx_int32_prod_to_all
#define shmem_int64_prod_to_all shmemx_int64_prod_to_all
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* OSHMEM_SHMEMX_H */
|