1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
|
//===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
//===----------------------------------------------------------------------===//
#ifndef TSAN_CLOCK_H
#define TSAN_CLOCK_H
#include "tsan_defs.h"
#include "tsan_dense_alloc.h"
namespace __tsan {
typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc;
typedef DenseSlabAllocCache ClockCache;
// The clock that lives in sync variables (mutexes, atomics, etc).
class SyncClock {
public:
SyncClock();
~SyncClock();
uptr size() const;
// These are used only in tests.
u64 get(unsigned tid) const;
u64 get_clean(unsigned tid) const;
void Resize(ClockCache *c, uptr nclk);
void Reset(ClockCache *c);
void DebugDump(int(*printf)(const char *s, ...));
// Clock element iterator.
// Note: it iterates only over the table without regard to dirty entries.
class Iter {
public:
explicit Iter(SyncClock* parent);
Iter& operator++();
bool operator!=(const Iter& other);
ClockElem &operator*();
private:
SyncClock *parent_;
// [pos_, end_) is the current continuous range of clock elements.
ClockElem *pos_;
ClockElem *end_;
int block_; // Current number of second level block.
NOINLINE void Next();
};
Iter begin();
Iter end();
private:
friend class ThreadClock;
friend class Iter;
static const uptr kDirtyTids = 2;
struct Dirty {
u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; }
void set_tid(u32 tid) {
tid_ = tid == kInvalidTid ? kShortInvalidTid : tid;
}
u64 epoch : kClkBits;
private:
// Full kInvalidTid won't fit into Dirty::tid.
static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1;
u64 tid_ : 64 - kClkBits; // kInvalidId if not active
};
static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit");
unsigned release_store_tid_;
unsigned release_store_reused_;
Dirty dirty_[kDirtyTids];
// If size_ is 0, tab_ is nullptr.
// If size <= 64 (kClockCount), tab_ contains pointer to an array with
// 64 ClockElem's (ClockBlock::clock).
// Otherwise, tab_ points to an array with up to 127 u32 elements,
// each pointing to the second-level 512b block with 64 ClockElem's.
// Unused space in the first level ClockBlock is used to store additional
// clock elements.
// The last u32 element in the first level ClockBlock is always used as
// reference counter.
//
// See the following scheme for details.
// All memory blocks are 512 bytes (allocated from ClockAlloc).
// Clock (clk) elements are 64 bits.
// Idx and ref are 32 bits.
//
// tab_
// |
// \/
// +----------------------------------------------------+
// | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
// +----------------------------------------------------+
// | |
// | \/
// | +----------------+
// | | clk0 ... clk63 |
// | +----------------+
// \/
// +------------------+
// | clk64 ... clk127 |
// +------------------+
//
// Note: dirty entries, if active, always override what's stored in the clock.
ClockBlock *tab_;
u32 tab_idx_;
u16 size_;
u16 blocks_; // Number of second level blocks.
void Unshare(ClockCache *c);
bool IsShared() const;
bool Cachable() const;
void ResetImpl();
void FlushDirty();
uptr capacity() const;
u32 get_block(uptr bi) const;
void append_block(u32 idx);
ClockElem &elem(unsigned tid) const;
};
// The clock that lives in threads.
class ThreadClock {
public:
typedef DenseSlabAllocCache Cache;
explicit ThreadClock(unsigned tid, unsigned reused = 0);
u64 get(unsigned tid) const;
void set(ClockCache *c, unsigned tid, u64 v);
void set(u64 v);
void tick();
uptr size() const;
void acquire(ClockCache *c, SyncClock *src);
void releaseStoreAcquire(ClockCache *c, SyncClock *src);
void release(ClockCache *c, SyncClock *dst);
void acq_rel(ClockCache *c, SyncClock *dst);
void ReleaseStore(ClockCache *c, SyncClock *dst);
void ResetCached(ClockCache *c);
void NoteGlobalAcquire(u64 v);
void DebugReset();
void DebugDump(int(*printf)(const char *s, ...));
private:
static const uptr kDirtyTids = SyncClock::kDirtyTids;
// Index of the thread associated with he clock ("current thread").
const unsigned tid_;
const unsigned reused_; // tid_ reuse count.
// Current thread time when it acquired something from other threads.
u64 last_acquire_;
// Last time another thread has done a global acquire of this thread's clock.
// It helps to avoid problem described in:
// https://github.com/golang/go/issues/39186
// See test/tsan/java_finalizer2.cpp for a regression test.
// Note the failuire is _extremely_ hard to hit, so if you are trying
// to reproduce it, you may want to run something like:
// $ go get golang.org/x/tools/cmd/stress
// $ stress -p=64 ./a.out
//
// The crux of the problem is roughly as follows.
// A number of O(1) optimizations in the clocks algorithm assume proper
// transitive cumulative propagation of clock values. The AcquireGlobal
// operation may produce an inconsistent non-linearazable view of
// thread clocks. Namely, it may acquire a later value from a thread
// with a higher ID, but fail to acquire an earlier value from a thread
// with a lower ID. If a thread that executed AcquireGlobal then releases
// to a sync clock, it will spoil the sync clock with the inconsistent
// values. If another thread later releases to the sync clock, the optimized
// algorithm may break.
//
// The exact sequence of events that leads to the failure.
// - thread 1 executes AcquireGlobal
// - thread 1 acquires value 1 for thread 2
// - thread 2 increments clock to 2
// - thread 2 releases to sync object 1
// - thread 3 at time 1
// - thread 3 acquires from sync object 1
// - thread 3 increments clock to 2
// - thread 1 acquires value 2 for thread 3
// - thread 1 releases to sync object 2
// - sync object 2 clock has 1 for thread 2 and 2 for thread 3
// - thread 3 releases to sync object 2
// - thread 3 sees value 2 in the clock for itself
// and decides that it has already released to the clock
// and did not acquire anything from other threads after that
// (the last_acquire_ check in release operation)
// - thread 3 does not update the value for thread 2 in the clock from 1 to 2
// - thread 4 acquires from sync object 2
// - thread 4 detects a false race with thread 2
// as it should have been synchronized with thread 2 up to time 2,
// but because of the broken clock it is now synchronized only up to time 1
//
// The global_acquire_ value helps to prevent this scenario.
// Namely, thread 3 will not trust any own clock values up to global_acquire_
// for the purposes of the last_acquire_ optimization.
atomic_uint64_t global_acquire_;
// Cached SyncClock (without dirty entries and release_store_tid_).
// We reuse it for subsequent store-release operations without intervening
// acquire operations. Since it is shared (and thus constant), clock value
// for the current thread is then stored in dirty entries in the SyncClock.
// We host a reference to the table while it is cached here.
u32 cached_idx_;
u16 cached_size_;
u16 cached_blocks_;
// Number of active elements in the clk_ table (the rest is zeros).
uptr nclk_;
u64 clk_[kMaxTidInClock]; // Fixed size vector clock.
bool IsAlreadyAcquired(const SyncClock *src) const;
bool HasAcquiredAfterRelease(const SyncClock *dst) const;
void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
};
ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
DCHECK_LT(tid, kMaxTidInClock);
return clk_[tid];
}
ALWAYS_INLINE void ThreadClock::set(u64 v) {
DCHECK_GE(v, clk_[tid_]);
clk_[tid_] = v;
}
ALWAYS_INLINE void ThreadClock::tick() {
clk_[tid_]++;
}
ALWAYS_INLINE uptr ThreadClock::size() const {
return nclk_;
}
ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
// Here we rely on the fact that AcquireGlobal is protected by
// ThreadRegistryLock, thus only one thread at a time executes it
// and values passed to this function should not go backwards.
CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
atomic_store_relaxed(&global_acquire_, v);
}
ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
return Iter(this);
}
ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
return Iter(nullptr);
}
ALWAYS_INLINE uptr SyncClock::size() const {
return size_;
}
ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
: parent_(parent)
, pos_(nullptr)
, end_(nullptr)
, block_(-1) {
if (parent)
Next();
}
ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
pos_++;
if (UNLIKELY(pos_ >= end_))
Next();
return *this;
}
ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
return parent_ != other.parent_;
}
ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
return *pos_;
}
} // namespace __tsan
#endif // TSAN_CLOCK_H
|