1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
// Start of atomics8.h
SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_global(volatile __global int8_t *p,
int8_t cmp, int8_t val);
SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_shared(volatile __local int8_t *p,
int8_t cmp, int8_t val);
SCALAR_FUN_ATTR int8_t atomic_add_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_add_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smax_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smax_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smin_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smin_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umax_i8_global(volatile __global uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umax_i8_shared(volatile __local uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umin_i8_global(volatile __global uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umin_i8_shared(volatile __local uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR int8_t atomic_and_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_and_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_or_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_or_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_xor_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_xor_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_global(volatile __global int8_t *p,
int8_t cmp, int8_t val) {
int offset = ((uintptr_t)p & 3);
volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);
int shift = offset * 8;
int32_t mask = 0xff << shift;
int32_t shifted_val = val << shift;
int32_t shifted_cmp = cmp << shift;
uint32_t old = shifted_cmp;
uint32_t upd = shifted_val;
uint32_t got;
while ((got=atomic_cmpxchg_i32_global(p32, old, upd)) != old) {
old = got;
upd = (old & ~mask) | shifted_val;
}
return old >> shift;
}
SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_shared(volatile __local int8_t *p,
int8_t cmp, int8_t val) {
int offset = ((uintptr_t)p >> 1 & 3);
volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);
int shift = offset * 8;
int32_t mask = 0xff << shift;
int32_t shifted_val = val << shift;
int32_t shifted_cmp = cmp << shift;
uint32_t old = shifted_cmp;
uint32_t upd = shifted_val;
uint32_t got;
while ((got=atomic_cmpxchg_i32_shared(p32, old, upd)) != old) {
old = got;
upd = (old & ~mask) | shifted_val;
}
return old >> shift;
}
// Convenience macro for arithmetic.
#define DEFINE_8BIT_ATOMIC(name, T, op) \
SCALAR_FUN_ATTR T \
atomic_##name##_i8_global(volatile __global T *p, T val) { \
int offset = ((uintptr_t)p & 3); \
volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3); \
int shift = offset * 8; \
int32_t mask = 0xff << shift; \
int32_t old = 0; \
int32_t upd = mask & (op(old >> shift, val) << shift); \
int32_t saw; \
while ((saw=atomic_cmpxchg_i32_global(p32, old, upd)) != old) { \
old = saw; \
upd = (old & ~mask) | ((op(old >> shift, val)) << shift); \
} \
return old >> shift; \
} \
SCALAR_FUN_ATTR T \
atomic_##name##_i8_shared(volatile __local T *p, T val) { \
int offset = ((uintptr_t)p & 3); \
volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3); \
int shift = offset * 8; \
int32_t mask = 0xff << shift; \
int32_t old = 0; \
int32_t upd = mask & ((op(old >> shift, val)) << shift); \
int32_t saw; \
while ((saw=atomic_cmpxchg_i32_shared(p32, old, upd)) != old) { \
old = saw; \
upd = (old & ~mask) | ((op(old >> shift, val)) << shift); \
} \
return old >> shift; \
}
DEFINE_8BIT_ATOMIC(add, int8_t, add8);
DEFINE_8BIT_ATOMIC(smax, int8_t, smax8);
DEFINE_8BIT_ATOMIC(smin, int8_t, smin8);
DEFINE_8BIT_ATOMIC(umax, uint8_t, umax8);
DEFINE_8BIT_ATOMIC(umin, uint8_t, umin8);
SCALAR_FUN_ATTR int8_t atomic_and_i8_global(volatile __global int8_t *p, int8_t val) {
volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);
int shift = ((uintptr_t)p & 3) * 8;
int32_t mask = 0xff << shift;
return atomic_and_i32_global(p32, ~mask | (val<<shift)) >> shift;
}
SCALAR_FUN_ATTR int8_t atomic_and_i8_shared(volatile __local int8_t *p, int8_t val) {
volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);
int shift = ((uintptr_t)p & 3) * 8;
int32_t mask = 0xff << shift;
return atomic_and_i32_shared(p32, ~mask | (val<<shift)) >> shift;
}
SCALAR_FUN_ATTR int8_t atomic_or_i8_global(volatile __global int8_t *p, int8_t val) {
volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);
int shift = ((uintptr_t)p & 3) * 8;
return atomic_or_i32_global(p32, (uint8_t)val<<shift) >> shift;
}
SCALAR_FUN_ATTR int8_t atomic_or_i8_shared(volatile __local int8_t *p, int8_t val) {
volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);
int shift = ((uintptr_t)p & 3) * 8;
return atomic_or_i32_shared(p32, (uint8_t)val<<shift) >> shift;
}
SCALAR_FUN_ATTR int8_t atomic_xor_i8_global(volatile __global int8_t *p, int8_t val) {
volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);
int shift = ((uintptr_t)p & 3) * 8;
return atomic_xor_i32_global(p32, (uint8_t)val<<shift) >> shift;
}
SCALAR_FUN_ATTR int8_t atomic_xor_i8_shared(volatile __local int8_t *p, int8_t val) {
volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);
int shift = ((uintptr_t)p & 3) * 8;
return atomic_xor_i32_shared(p32, (uint8_t)val<<shift) >> shift;
}
// End of atomics8.h
|