1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
|
// SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
// SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
//
// SPDX-License-Identifier: LGPL-2.1-or-later
#ifndef _URCU_POINTER_STATIC_H
#define _URCU_POINTER_STATIC_H
/*
* Userspace RCU header. Operations on pointers.
*
* TO BE INCLUDED ONLY IN CODE THAT IS TO BE RECOMPILED ON EACH LIBURCU
* RELEASE. See urcu.h for linking dynamically with the userspace rcu library.
*
* IBM's contributions to this file may be relicensed under LGPLv2 or later.
*/
#include <urcu/compiler.h>
#include <urcu/arch.h>
#include <urcu/system.h>
#include <urcu/uatomic.h>
#include <urcu/annotate.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable
* into a RCU read-side critical section. The pointer can later be safely
* dereferenced within the critical section.
*
* This ensures that the pointer copy is invariant thorough the whole critical
* section.
*
* Inserts memory barriers on architectures that require them (currently only
* Alpha) and documents which pointers are protected by RCU.
*
* With C standards prior to C11/C++11, the compiler memory barrier in
* CMM_LOAD_SHARED() ensures that value-speculative optimizations (e.g.
* VSS: Value Speculation Scheduling) does not perform the data read
* before the pointer read by speculating the value of the pointer.
* Correct ordering is ensured because the pointer is read as a volatile
* access. This acts as a global side-effect operation, which forbids
* reordering of dependent memory operations.
*
* With C standards C11/C++11, concerns about dependency-breaking
* optimizations are taken care of by the "memory_order_consume" atomic
* load.
*
* Use the gcc __atomic_load() rather than C11/C++11 atomic load
* explicit because the pointer used as input argument is a pointer,
* not an _Atomic type as required by C11/C++11.
*
* By defining URCU_DEREFERENCE_USE_VOLATILE, the user requires use of
* a volatile memory_order_relaxed atomic load to implement
* rcu_dereference rather than memory_order_consume load from the
* C11/C++11 standards.
*
* CAUTION: If URCU_DEREFERENCE_USE_VOLATILE is defined, pointers comparisons
* _must_ be done using the `cmm_ptr_eq' static inline helper function to
* prevent the compiler optimizations from losing addresses dependencies,
* resulting in the loss of the publication guarantee.
*
* This may improve performance on weakly-ordered architectures where
* the compiler implements memory_order_consume as a
* memory_order_acquire, which is stricter than required by the
* standard.
*
* Note that using volatile accesses for rcu_dereference may cause
* LTO to generate incorrectly ordered code starting from C11/C++11.
* (note: to be confirmed whether this statement also applies to
* volatile memory_order_relaxed atomic load).
*
* Should match rcu_assign_pointer() or rcu_xchg_pointer().
*
* This macro is less than 10 lines long. The intent is that this macro
* meets the 10-line criterion in LGPL, allowing this function to be
* expanded directly in non-LGPL code.
*/
#ifdef URCU_DEREFERENCE_USE_VOLATILE
# define _rcu_dereference(p) \
__extension__ ({ \
__typeof__(p) *_________ptr = &(p); \
__typeof__(p) _________res = uatomic_load(_________ptr);\
cmm_smp_read_barrier_depends(); \
cmm_annotate_mem_acquire(_________ptr); \
(_________res); \
})
#else
# define _rcu_dereference(p) \
uatomic_load(&(p), CMM_CONSUME)
#endif
/**
* Compare pointers A and B, but prevent the compiler from using this knowledge
* for optimization.
*
* This may introduce some overhead. For example, compilers might allocate more
* register, resulting in register spilling.
*
* For example, in the following code, `cmm_ptr_eq' prevents the compiler from
* re-using the derefence of `a' at the first call of `do_func()' for the second
* call. If a simple equality comparison was used, the compiler could use the
* value of `global->x' across two RCU critical regions.
*
* struct work {
* long x;
* }
*
* struct work *global;
*
* void func(void)
* {
* struct work *a, *b;
*
* rcu_read_lock();
* a = rcu_dereference(global);
* if (a)
* do_func(a->x);
* rcu_read_unlock();
*
* some_stuff();
*
* rcu_read_lock();
* b = rcu_dereference(global);
* if (b && cmm_ptr_eq(a, b)) // force to use b->x and not a->x
* do_func(b->x);
* rcu_read_unlock();
* }
*
* Consider what could happen if such compiler optimization was done while the
* following mutator exists. The mutator removes a `work' node that was
* previously stored in a global location by storing NULL. It then call
* rcu_synchronize() to ensure no readers can see the node. From there, a new
* work is created for the node and the node is commited with
* rcu_assign_pointer(), thus the same pointer is now visible to readers. This
* fictional mutator is purposely trying to create a ABA problem for the sake of
* the argument:
*
* void mutator(void)
* {
* struct work *work;
*
* work = global;
* rcu_assign_pointer(global, NULL);
* rcu_synchronize();
* work->x = new_work();
* rcu_assign_pointer(global, work);
* }
*
* Then, the following execution could happen, where the new work assigned is
* not executed. In other words, the compiler optimizations can introduce a ABA
* problem, defeating one of RCU's purposes.
*
* Worker: Mutator:
* let a, b let work = global
* let tmp
* rcu_read_lock()
* a = rcu_dereference(global)
* if (a) {
* tmp = a->x
* do_func(tmp)
* }
* rcu_read_unlock()
* do_stuff() ...
* ... rcu_assign_pointer(global, NULL)
* ... rcu_synchronize()
* ... work->x = new_work()
* ... rcu_assign_pointer(global, work)
* rcu_read_lock()
* b = rcu_dereference(global)
* if (b && (a == b)) // BUGGY !!!
* do_func(tmp)
* rcu_read_unlock()
*
* In other words, the publication guarantee is lost here. The store to
* `work->x' in the mutator is supposed to be observable by the second
* rcu_dereference() in the worker, because of the matching
* rcu_assign_pointer(). But due to the equality comparison, the compiler
* decides to reuse the prior rcu_dereference().
*/
static inline int cmm_ptr_eq(const void *a, const void *b)
{
_CMM_OPTIMIZER_HIDE_VAR(a);
_CMM_OPTIMIZER_HIDE_VAR(b);
return a == b;
}
/**
* _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer
* is as expected by "old". If succeeds, returns the previous pointer to the
* data structure, which can be safely freed after waiting for a quiescent state
* using synchronize_rcu(). If fails (unexpected value), returns old (which
* should not be freed !).
*
* uatomic_cmpxchg() acts as both release and acquire barriers on success.
*
* This macro is less than 10 lines long. The intent is that this macro
* meets the 10-line criterion in LGPL, allowing this function to be
* expanded directly in non-LGPL code.
*/
#define _rcu_cmpxchg_pointer(p, old, _new) \
__extension__ \
({ \
__typeof__(*p) _________pold = (old); \
__typeof__(*p) _________pnew = (_new); \
uatomic_cmpxchg_mo(p, _________pold, _________pnew, \
CMM_SEQ_CST, CMM_RELAXED); \
});
/**
* _rcu_xchg_pointer - same as rcu_assign_pointer, but returns the previous
* pointer to the data structure, which can be safely freed after waiting for a
* quiescent state using synchronize_rcu().
*
* uatomic_xchg() acts as both release and acquire barriers.
*
* This macro is less than 10 lines long. The intent is that this macro
* meets the 10-line criterion in LGPL, allowing this function to be
* expanded directly in non-LGPL code.
*/
#define _rcu_xchg_pointer(p, v) \
__extension__ \
({ \
__typeof__(*p) _________pv = (v); \
uatomic_xchg_mo(p, _________pv, \
CMM_SEQ_CST); \
})
#define _rcu_set_pointer(p, v) \
do { \
__typeof__(*p) _________pv = (v); \
uatomic_store(p, _________pv, \
__builtin_constant_p(v) && (v) == NULL ? \
CMM_RELAXED : CMM_RELEASE); \
} while (0)
/**
* _rcu_assign_pointer - assign (publicize) a pointer to a new data structure
* meant to be read by RCU read-side critical sections. Returns the assigned
* value.
*
* Documents which pointers will be dereferenced by RCU read-side critical
* sections and adds the required memory barriers on architectures requiring
* them. It also makes sure the compiler does not reorder code initializing the
* data structure before its publication.
*
* Should match rcu_dereference().
*
* This macro is less than 10 lines long. The intent is that this macro
* meets the 10-line criterion in LGPL, allowing this function to be
* expanded directly in non-LGPL code.
*/
#define _rcu_assign_pointer(p, v) _rcu_set_pointer(&(p), v)
#ifdef __cplusplus
}
#endif
#endif /* _URCU_POINTER_STATIC_H */
|