File: atomics8.h

package info (click to toggle)
haskell-futhark 0.25.32-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 18,236 kB
  • sloc: haskell: 100,484; ansic: 12,100; python: 3,440; yacc: 785; sh: 561; javascript: 558; lisp: 399; makefile: 277
file content (145 lines) | stat: -rw-r--r-- 7,210 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// Start of atomics8.h

SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_global(volatile __global int8_t *p,
                                                int8_t cmp, int8_t val);
SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_shared(volatile __local int8_t *p,
                                                int8_t cmp, int8_t val);
SCALAR_FUN_ATTR int8_t atomic_add_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_add_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smax_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smax_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smin_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_smin_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umax_i8_global(volatile __global uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umax_i8_shared(volatile __local uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umin_i8_global(volatile __global uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR uint8_t atomic_umin_i8_shared(volatile __local uint8_t *p, uint8_t x);
SCALAR_FUN_ATTR int8_t atomic_and_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_and_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_or_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_or_i8_shared(volatile __local int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_xor_i8_global(volatile __global int8_t *p, int8_t x);
SCALAR_FUN_ATTR int8_t atomic_xor_i8_shared(volatile __local int8_t *p, int8_t x);

SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_global(volatile __global int8_t *p,
                                                int8_t cmp, int8_t val) {
  int offset = ((uintptr_t)p & 3);
  volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);

  int shift = offset * 8;
  int32_t mask = 0xff << shift;
  int32_t shifted_val = val << shift;
  int32_t shifted_cmp = cmp << shift;

  uint32_t old = shifted_cmp;
  uint32_t upd = shifted_val;
  uint32_t got;

  while ((got=atomic_cmpxchg_i32_global(p32, old, upd)) != old) {
    old = got;
    upd = (old & ~mask) | shifted_val;
  }

  return old >> shift;
}

SCALAR_FUN_ATTR int8_t atomic_cmpxchg_i8_shared(volatile __local int8_t *p,
                                                int8_t cmp, int8_t val) {
  int offset = ((uintptr_t)p >> 1 & 3);
  volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);

  int shift = offset * 8;
  int32_t mask = 0xff << shift;
  int32_t shifted_val = val << shift;
  int32_t shifted_cmp = cmp << shift;

  uint32_t old = shifted_cmp;
  uint32_t upd = shifted_val;
  uint32_t got;

  while ((got=atomic_cmpxchg_i32_shared(p32, old, upd)) != old) {
    old = got;
    upd = (old & ~mask) | shifted_val;
  }

  return old >> shift;
}

// Convenience macro for arithmetic.
#define DEFINE_8BIT_ATOMIC(name, T, op)                                 \
  SCALAR_FUN_ATTR T                                                     \
  atomic_##name##_i8_global(volatile __global T *p, T val) {            \
    int offset = ((uintptr_t)p & 3);                                    \
    volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3); \
    int shift = offset * 8;                                             \
    int32_t mask = 0xff << shift;                                       \
    int32_t old = 0;                                                    \
    int32_t upd = mask & (op(old >> shift, val) << shift);              \
    int32_t saw;                                                        \
    while ((saw=atomic_cmpxchg_i32_global(p32, old, upd)) != old) {     \
      old = saw;                                                        \
      upd = (old & ~mask) | ((op(old >> shift, val)) << shift);         \
    }                                                                   \
    return old >> shift;                                                \
  }                                                                     \
  SCALAR_FUN_ATTR T                                                     \
  atomic_##name##_i8_shared(volatile __local T *p, T val) {             \
    int offset = ((uintptr_t)p & 3);                                    \
    volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3); \
    int shift = offset * 8;                                             \
    int32_t mask = 0xff << shift;                                       \
    int32_t old = 0;                                                    \
    int32_t upd = mask & ((op(old >> shift, val)) << shift);            \
    int32_t saw;                                                        \
    while ((saw=atomic_cmpxchg_i32_shared(p32, old, upd)) != old) {     \
      old = saw;                                                        \
      upd = (old & ~mask) | ((op(old >> shift, val)) << shift);         \
    }                                                                   \
    return old >> shift;                                                \
  }

DEFINE_8BIT_ATOMIC(add, int8_t, add8);
DEFINE_8BIT_ATOMIC(smax, int8_t, smax8);
DEFINE_8BIT_ATOMIC(smin, int8_t, smin8);
DEFINE_8BIT_ATOMIC(umax, uint8_t, umax8);
DEFINE_8BIT_ATOMIC(umin, uint8_t, umin8);

SCALAR_FUN_ATTR int8_t atomic_and_i8_global(volatile __global int8_t *p, int8_t val) {
  volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);
  int shift = ((uintptr_t)p & 3) * 8;
  int32_t mask = 0xff << shift;
  return atomic_and_i32_global(p32, ~mask | (val<<shift)) >> shift;
}

SCALAR_FUN_ATTR int8_t atomic_and_i8_shared(volatile __local int8_t *p, int8_t val) {
  volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);
  int shift = ((uintptr_t)p & 3) * 8;
  int32_t mask = 0xff << shift;
  return atomic_and_i32_shared(p32, ~mask | (val<<shift)) >> shift;
}

SCALAR_FUN_ATTR int8_t atomic_or_i8_global(volatile __global int8_t *p, int8_t val) {
  volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);
  int shift = ((uintptr_t)p & 3) * 8;
  return atomic_or_i32_global(p32, (uint8_t)val<<shift) >> shift;
}

SCALAR_FUN_ATTR int8_t atomic_or_i8_shared(volatile __local int8_t *p, int8_t val) {
  volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);
  int shift = ((uintptr_t)p & 3) * 8;
  return atomic_or_i32_shared(p32, (uint8_t)val<<shift) >> shift;
}

SCALAR_FUN_ATTR int8_t atomic_xor_i8_global(volatile __global int8_t *p, int8_t val) {
  volatile __global int32_t *p32 = (volatile __global int32_t*)((uintptr_t)p & ~0x3);
  int shift = ((uintptr_t)p & 3) * 8;
  return atomic_xor_i32_global(p32, (uint8_t)val<<shift) >> shift;
}

SCALAR_FUN_ATTR int8_t atomic_xor_i8_shared(volatile __local int8_t *p, int8_t val) {
  volatile __local int32_t *p32 = (volatile __local int32_t*)((uintptr_t)p & ~0x3);
  int shift = ((uintptr_t)p & 3) * 8;
  return atomic_xor_i32_shared(p32, (uint8_t)val<<shift) >> shift;
}

// End of atomics8.h