1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
|
/*
* Copyright (c) 2023 NVIDIA Corporation.
* All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "oshmem/constants.h"
#include "oshmem/include/shmem.h"
#include "oshmem/runtime/params.h"
#include "oshmem/runtime/runtime.h"
#include <stdlib.h>
#include <memory.h>
#include "oshmem/shmem/shmem_api_logger.h"
#include "oshmem/shmem/shmem_lock.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#include "oshmem/mca/atomic/atomic.h"
#define OPAL_BITWISE_SIZEOF_LONG (SIZEOF_LONG * 8)
/** Use basic MCS distributed lock algorithm for lock */
struct shmem_mcs_lock {
/** has meaning only on MCSQ_TAIL OWNER */
int tail;
/** It has meaning on all PEs */
/** The next pointer is a combination of the PE ID and wait signal */
int next;
};
typedef struct shmem_mcs_lock shmem_mcs_lock_t;
#define SHMEM_MCSL_TAIL_OWNER(lock_ptr)\
(((uintptr_t)(lock_ptr) / sizeof(long)) % shmem_n_pes())
#define SHMEM_MCSL_NEXT_MASK 0x7FFFFFFFU
#define SHMEM_MCSL_SIGNAL_MASK 0x80000000U /** Wait signal mask */
#define SHMEM_MCSL_NEXT(lock_val) ((lock_val) & SHMEM_MCSL_NEXT_MASK)
/** Improve readability */
#define SHMEM_MCSL_GET_PE(tail_val) ((tail_val) & SHMEM_MCSL_NEXT_MASK)
#define SHMEM_MCSL_SIGNAL(lock_val) ((lock_val) & SHMEM_MCSL_SIGNAL_MASK)
#define SHMEM_MCSL_SET_SIGNAL(lock_val) ((lock_val) | SHMEM_MCSL_SIGNAL_MASK)
void
_shmem_mcs_set_lock(long *lockp)
{
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
int new_tail_req = 0;
int *tail = &(lock->tail);
int *next = &(lock->next);
int my_pe = shmem_my_pe();
int curr = 0;
int out_value = 0;
int prev_tail = 0;
int prev_tailpe = 0;
int tval = 0;
int tmp_val = 0;
int retv = 0;
uint64_t value_tmp = 0;
RUNTIME_CHECK_INIT();
/**
* Initializing next pointer to next mask
* Done atomically to avoid races as NEXT pointer
* can be modified by other PEs while acquiring or
* releasing it.
*/
/**
* Can make this to be shmem_atomic_set to be safe
* in non-cc architectures
* has an impact on performance
*/
value_tmp = SHMEM_MCSL_NEXT_MASK;
out_value = SHMEM_MCSL_NEXT_MASK;
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void*)next,
(void*)&out_value, value_tmp,
sizeof(int), my_pe));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));
/** Signal for setting lock */
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe);
/**
* Swap and make me the new tail and update in tail owner
* Get the previous tail PE.
*/
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void *)tail,
(void*)&prev_tail,
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req,
sizeof(new_tail_req)),
sizeof(int), mcs_tail_owner));
RUNTIME_CHECK_RC(retv);
prev_tailpe = SHMEM_MCSL_GET_PE(prev_tail);
if (SHMEM_MCSL_SIGNAL(prev_tail)) {
/**
* Someone else has got the lock before this PE
* Adding this PE to the previous tail PE's Next pointer
* Substract the SIGNAL Bit to avoid changing it.
*/
tmp_val = my_pe - SHMEM_MCSL_NEXT_MASK;
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void*)next, tmp_val,
sizeof(int), prev_tailpe));
RUNTIME_CHECK_RC(retv);
/**
* This value to be changed eventually by predecessor
* when its lock is released.
* Need to be done atomically to avoid any races where
* next pointer is modified by another PE acquiring or
* releasing this.
*/
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void *)next,
SHMEM_MCSL_SIGNAL_MASK, sizeof(int),
my_pe));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));
/** Wait for predecessor release lock to this PE signal to false. */
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&curr, tval, sizeof(int), my_pe));
RUNTIME_CHECK_RC(retv);
while (SHMEM_MCSL_SIGNAL(curr)) {
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE,
(void*)&curr, SHMEM_INT));
RUNTIME_CHECK_RC(retv);
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&curr, tval, sizeof(int),
my_pe));
RUNTIME_CHECK_RC(retv);
}
}
/** else.. this pe has got the lock as no one else had it */
}
void
_shmem_mcs_clear_lock(long *lockp)
{
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
int *tail = &(lock->tail);
int *next = &(lock->next);
int my_pe = shmem_my_pe();
int next_value = 0;
int swap_cond = 0;
int prev_value = 0;
int tval = 0;
int val_tmp = 0;
int nmask = 0;
int a_val = 0;
int retv = 0;
/**
* Can make atomic fetch to be safe in non-cc architectures
* Has impact on performance
*/
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&next_value, tval, sizeof(int),
my_pe));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));
if (next_value == SHMEM_MCSL_NEXT_MASK) {
swap_cond = SHMEM_MCSL_SET_SIGNAL(my_pe);
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default,
(void *)tail, (uint64_t *)&(prev_value),
OSHMEM_ATOMIC_PTR_2_INT(&swap_cond,
sizeof(swap_cond)),
OSHMEM_ATOMIC_PTR_2_INT(&val_tmp,
sizeof(val_tmp)), sizeof(int),
mcs_tail_owner));
RUNTIME_CHECK_RC(retv);
/** I am the tail.. and lock is released */
if (prev_value == swap_cond) {
return;
}
/**
* I am not the tail, another PE maybe racing to acquire lock,
* let them complete setting themselves as our next
*/
nmask = SHMEM_MCSL_NEXT_MASK;
while(next_value == nmask) {
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE,
(void*)&nmask, SHMEM_INT));
RUNTIME_CHECK_RC(retv);
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&next_value, tval,
sizeof(int), my_pe));
RUNTIME_CHECK_RC(retv);
}
}
/** There is a successor release lock to the successor */
a_val = SHMEM_MCSL_SIGNAL_MASK;
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default,
(void *)next, a_val, sizeof(a_val),
SHMEM_MCSL_NEXT(next_value)));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));
}
int
_shmem_mcs_test_lock(long *lockp)
{
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
int new_tail_req = 0;
int prev_tail = 0;
int tmp_cond = 0;
int *tail = &(lock->tail);
int *next = &(lock->next);
int my_pe = shmem_my_pe();
int retv = 0;
/** Initializing next pointer to next mask */
*next = SHMEM_MCSL_NEXT_MASK;
/** Signal for setting lock */
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe);
/** Check if previously cleared before swapping */
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default,
(void *)tail, (uint64_t *)&(prev_tail),
OSHMEM_ATOMIC_PTR_2_INT(&tmp_cond,
sizeof(tmp_cond)),
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req,
sizeof(new_tail_req)),
sizeof(int), mcs_tail_owner));
RUNTIME_CHECK_RC(retv);
return (0 != prev_tail);
}
|