1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
|
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SIX_H
#define _LINUX_SIX_H
/**
* DOC: SIX locks overview
*
* Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
* but with an additional state: read/shared, intent, exclusive/write
*
* The purpose of the intent state is to allow for greater concurrency on tree
* structures without deadlocking. In general, a read can't be upgraded to a
* write lock without deadlocking, so an operation that updates multiple nodes
* will have to take write locks for the full duration of the operation.
*
* But by adding an intent state, which is exclusive with other intent locks but
* not with readers, we can take intent locks at the start of the operation,
* and then take write locks only for the actual update to each individual
* nodes, without deadlocking.
*
* Example usage:
* six_lock_read(&foo->lock);
* six_unlock_read(&foo->lock);
*
* An intent lock must be held before taking a write lock:
* six_lock_intent(&foo->lock);
* six_lock_write(&foo->lock);
* six_unlock_write(&foo->lock);
* six_unlock_intent(&foo->lock);
*
* Other operations:
* six_trylock_read()
* six_trylock_intent()
* six_trylock_write()
*
* six_lock_downgrade() convert from intent to read
* six_lock_tryupgrade() attempt to convert from read to intent, may fail
*
* There are also interfaces that take the lock type as an enum:
*
* six_lock_type(&foo->lock, SIX_LOCK_read);
* six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
* six_lock_type(&foo->lock, SIX_LOCK_write);
* six_unlock_type(&foo->lock, SIX_LOCK_write);
* six_unlock_type(&foo->lock, SIX_LOCK_intent);
*
* Lock sequence numbers - unlock(), relock():
*
* Locks embed sequences numbers, which are incremented on write lock/unlock.
* This allows locks to be dropped and the retaken iff the state they protect
* hasn't changed; this makes it much easier to avoid holding locks while e.g.
* doing IO or allocating memory.
*
* Example usage:
* six_lock_read(&foo->lock);
* u32 seq = six_lock_seq(&foo->lock);
* six_unlock_read(&foo->lock);
*
* some_operation_that_may_block();
*
* if (six_relock_read(&foo->lock, seq)) { ... }
*
* If the relock operation succeeds, it is as if the lock was never unlocked.
*
* Reentrancy:
*
* Six locks are not by themselves reentrant, but have counters for both the
* read and intent states that can be used to provide reentrancy by an upper
* layer that tracks held locks. If a lock is known to already be held in the
* read or intent state, six_lock_increment() can be used to bump the "lock
* held in this state" counter, increasing the number of unlock calls that
* will be required to fully unlock it.
*
* Example usage:
* six_lock_read(&foo->lock);
* six_lock_increment(&foo->lock, SIX_LOCK_read);
* six_unlock_read(&foo->lock);
* six_unlock_read(&foo->lock);
* foo->lock is now fully unlocked.
*
* Since the intent state supercedes read, it's legal to increment the read
* counter when holding an intent lock, but not the reverse.
*
* A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
* is not legal.
*
* should_sleep_fn:
*
* There is a six_lock() variant that takes a function pointer that is called
* immediately prior to schedule() when blocking, and may return an error to
* abort.
*
* One possible use for this feature is when objects being locked are part of
* a cache and may reused, and lock ordering is based on a property of the
* object that will change when the object is reused - i.e. logical key order.
*
* If looking up an object in the cache may race with object reuse, and lock
* ordering is required to prevent deadlock, object reuse may change the
* correct lock order for that object and cause a deadlock. should_sleep_fn
* can be used to check if the object is still the object we want and avoid
* this deadlock.
*
* Wait list entry interface:
*
* There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
* wait list entry. By embedding six_lock_waiter into another object, and by
* traversing lock waitlists, it is then possible for an upper layer to
* implement full cycle detection for deadlock avoidance.
*
* should_sleep_fn should be used for invoking the cycle detector, walking the
* graph of held locks to check for a deadlock. The upper layer must track
* held locks for each thread, and each thread's held locks must be reachable
* from its six_lock_waiter object.
*
* six_lock_waiter() will add the wait object to the waitlist re-trying taking
* the lock, and before calling should_sleep_fn, and the wait object will not
* be removed from the waitlist until either the lock has been successfully
* acquired, or we aborted because should_sleep_fn returned an error.
*
* Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
* have timestamps in strictly ascending order - this is so the timestamp can
* be used as a cursor for lock graph traverse.
*/
#include <linux/lockdep.h>
#include <linux/sched.h>
#include <linux/types.h>
enum six_lock_type {
SIX_LOCK_read,
SIX_LOCK_intent,
SIX_LOCK_write,
};
struct six_lock {
atomic_t state;
u32 seq;
unsigned intent_lock_recurse;
unsigned write_lock_recurse;
struct task_struct *owner;
unsigned __percpu *readers;
raw_spinlock_t wait_lock;
struct list_head wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
struct six_lock_waiter {
struct list_head list;
struct task_struct *task;
enum six_lock_type lock_want;
bool lock_acquired;
u64 start_time;
};
typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
void six_lock_exit(struct six_lock *lock);
enum six_lock_init_flags {
SIX_LOCK_INIT_PCPU = 1U << 0,
};
void __six_lock_init(struct six_lock *lock, const char *name,
struct lock_class_key *key, enum six_lock_init_flags flags,
gfp_t gfp);
/**
* six_lock_init - initialize a six lock
* @lock: lock to initialize
* @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU
*/
#define six_lock_init(lock, flags, gfp) \
do { \
static struct lock_class_key __key; \
\
__six_lock_init((lock), #lock, &__key, flags, gfp); \
} while (0)
/**
* six_lock_seq - obtain current lock sequence number
* @lock: six_lock to obtain sequence number for
*
* @lock should be held for read or intent, and not write
*
* By saving the lock sequence number, we can unlock @lock and then (typically
* after some blocking operation) attempt to relock it: the relock will succeed
* if the sequence number hasn't changed, meaning no write locks have been taken
* and state corresponding to what @lock protects is still valid.
*/
static inline u32 six_lock_seq(const struct six_lock *lock)
{
return lock->seq;
}
bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
/**
* six_trylock_type - attempt to take a six lock without blocking
* @lock: lock to take
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
*
* Return: true on success, false on failure.
*/
static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
{
return six_trylock_ip(lock, type, _THIS_IP_);
}
int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
struct six_lock_waiter *wait,
six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip);
/**
* six_lock_waiter - take a lock, with full waitlist interface
* @lock: lock to take
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
* @wait: pointer to wait object, which will be added to lock's waitlist
* @should_sleep_fn: callback run after adding to waitlist, immediately prior
* to scheduling
* @p: passed through to @should_sleep_fn
*
* This is a convenience wrapper around six_lock_ip_waiter(), see that function
* for full documentation.
*
* Return: 0 on success, or the return code from @should_sleep_fn on failure.
*/
static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
struct six_lock_waiter *wait,
six_lock_should_sleep_fn should_sleep_fn, void *p)
{
return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
}
/**
* six_lock_ip - take a six lock lock
* @lock: lock to take
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
* @should_sleep_fn: callback run after adding to waitlist, immediately prior
* to scheduling
* @p: passed through to @should_sleep_fn
* @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
*
* Return: 0 on success, or the return code from @should_sleep_fn on failure.
*/
static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{
struct six_lock_waiter wait;
return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
}
/**
* six_lock_type - take a six lock lock
* @lock: lock to take
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
* @should_sleep_fn: callback run after adding to waitlist, immediately prior
* to scheduling
* @p: passed through to @should_sleep_fn
*
* Return: 0 on success, or the return code from @should_sleep_fn on failure.
*/
static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p)
{
struct six_lock_waiter wait;
return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
}
bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
unsigned seq, unsigned long ip);
/**
* six_relock_type - attempt to re-take a lock that was held previously
* @lock: lock to take
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
* @seq: lock sequence number obtained from six_lock_seq() while lock was
* held previously
*
* Return: true on success, false on failure.
*/
static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
unsigned seq)
{
return six_relock_ip(lock, type, seq, _THIS_IP_);
}
void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
/**
* six_unlock_type - drop a six lock
* @lock: lock to unlock
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
*
* When a lock is held multiple times (because six_lock_incement()) was used),
* this decrements the 'lock held' counter by one.
*
* For example:
* six_lock_read(&foo->lock); read count 1
* six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
* six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
* six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
*/
static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
{
six_unlock_ip(lock, type, _THIS_IP_);
}
#define __SIX_LOCK(type) \
static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
{ \
return six_trylock_ip(lock, SIX_LOCK_##type, ip); \
} \
\
static inline bool six_trylock_##type(struct six_lock *lock) \
{ \
return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
} \
\
static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \
struct six_lock_waiter *wait, \
six_lock_should_sleep_fn should_sleep_fn, void *p,\
unsigned long ip) \
{ \
return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
} \
\
static inline int six_lock_ip_##type(struct six_lock *lock, \
six_lock_should_sleep_fn should_sleep_fn, void *p, \
unsigned long ip) \
{ \
return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
} \
\
static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
{ \
return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \
} \
\
static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \
{ \
return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \
} \
\
static inline int six_lock_##type(struct six_lock *lock, \
six_lock_should_sleep_fn fn, void *p)\
{ \
return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \
} \
\
static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
{ \
six_unlock_ip(lock, SIX_LOCK_##type, ip); \
} \
\
static inline void six_unlock_##type(struct six_lock *lock) \
{ \
six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
}
__SIX_LOCK(read)
__SIX_LOCK(intent)
__SIX_LOCK(write)
#undef __SIX_LOCK
void six_lock_downgrade(struct six_lock *);
bool six_lock_tryupgrade(struct six_lock *);
bool six_trylock_convert(struct six_lock *, enum six_lock_type,
enum six_lock_type);
void six_lock_increment(struct six_lock *, enum six_lock_type);
void six_lock_wakeup_all(struct six_lock *);
struct six_lock_count {
unsigned n[3];
};
struct six_lock_count six_lock_counts(struct six_lock *);
void six_lock_readers_add(struct six_lock *, int);
#endif /* _LINUX_SIX_H */
|