1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
|
/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
Contributed by Torvald Riegel <triegel@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libitm_i.h"
#include "futex.h"
#include <limits.h>
namespace GTM HIDDEN {
// Acquire a RW lock for reading.
void
gtm_rwlock::read_lock (gtm_thread *tx)
{
for (;;)
{
// Fast path: first announce our intent to read, then check for
// conflicting intents to write. The fence ensures that this happens
// in exactly this order.
tx->shared_state.store (0, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (likely (writers.load (memory_order_relaxed) == 0))
return;
// There seems to be an active, waiting, or confirmed writer, so enter
// the futex-based slow path.
// Before waiting, we clear our read intent check whether there are any
// writers that might potentially wait for readers. If so, wake them.
// We need the barrier here for the same reason that we need it in
// read_unlock().
// TODO Potentially too many wake-ups. See comments in read_unlock().
tx->shared_state.store (-1, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (writer_readers.load (memory_order_relaxed) > 0)
{
writer_readers.store (0, memory_order_relaxed);
futex_wake(&writer_readers, 1);
}
// Signal that there are waiting readers and wait until there is no
// writer anymore.
// TODO Spin here on writers for a while. Consider whether we woke
// any writers before?
while (writers.load (memory_order_relaxed))
{
// An active writer. Wait until it has finished. To avoid lost
// wake-ups, we need to use Dekker-like synchronization.
// Note that we cannot reset readers to zero when we see that there
// are no writers anymore after the barrier because this pending
// store could then lead to lost wake-ups at other readers.
readers.store (1, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (writers.load (memory_order_relaxed))
futex_wait(&readers, 1);
else
{
// There is no writer, actually. However, we can have enabled
// a futex_wait in other readers by previously setting readers
// to 1, so we have to wake them up because there is no writer
// that will do that. We don't know whether the wake-up is
// really necessary, but we can get lost wake-up situations
// otherwise.
// No additional barrier nor a nonrelaxed load is required due
// to coherency constraints. write_unlock() checks readers to
// see if any wake-up is necessary, but it is not possible that
// a reader's store prevents a required later writer wake-up;
// If the waking reader's store (value 0) is in modification
// order after the waiting readers store (value 1), then the
// latter will have to read 0 in the futex due to coherency
// constraints and the happens-before enforced by the futex
// (paragraph 6.10 in the standard, 6.19.4 in the Batty et al
// TR); second, the writer will be forced to read in
// modification order too due to Dekker-style synchronization
// with the waiting reader (see write_unlock()).
// ??? Can we avoid the wake-up if readers is zero (like in
// write_unlock())? Anyway, this might happen too infrequently
// to improve performance significantly.
readers.store (0, memory_order_relaxed);
futex_wake(&readers, INT_MAX);
}
}
// And we try again to acquire a read lock.
}
}
// Acquire a RW lock for writing. Generic version that also works for
// upgrades.
// Note that an upgrade might fail (and thus waste previous work done during
// this transaction) if there is another thread that tried to go into serial
// mode earlier (i.e., upgrades do not have higher priority than pure writers).
// However, this seems rare enough to not consider it further as we need both
// a non-upgrade writer and a writer to happen to switch to serial mode
// concurrently. If we'd want to handle this, a writer waiting for readers
// would have to coordinate with later arriving upgrades and hand over the
// lock to them, including the the reader-waiting state. We can try to support
// this if this will actually happen often enough in real workloads.
bool
gtm_rwlock::write_lock_generic (gtm_thread *tx)
{
// Try to acquire the write lock.
int w = 0;
if (unlikely (!writers.compare_exchange_strong (w, 1)))
{
// If this is an upgrade, we must not wait for other writers or
// upgrades.
if (tx != 0)
return false;
// There is already a writer. If there are no other waiting writers,
// switch to contended mode. We need seq_cst memory order to make the
// Dekker-style synchronization work.
if (w != 2)
w = writers.exchange (2);
while (w != 0)
{
futex_wait(&writers, 2);
w = writers.exchange (2);
}
}
// We have acquired the writer side of the R/W lock. Now wait for any
// readers that might still be active.
// We don't need an extra barrier here because the CAS and the xchg
// operations have full barrier semantics already.
// TODO In the worst case, this requires one wait/wake pair for each
// active reader. Reduce this!
for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
it = it->next_thread)
{
if (it == tx)
continue;
// Use a loop here to check reader flags again after waiting.
while (it->shared_state.load (memory_order_relaxed)
!= ~(typeof it->shared_state)0)
{
// An active reader. Wait until it has finished. To avoid lost
// wake-ups, we need to use Dekker-like synchronization.
// Note that we can reset writer_readers to zero when we see after
// the barrier that the reader has finished in the meantime;
// however, this is only possible because we are the only writer.
// TODO Spin for a while on this reader flag.
writer_readers.store (1, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (it->shared_state.load (memory_order_relaxed)
!= ~(typeof it->shared_state)0)
futex_wait(&writer_readers, 1);
else
writer_readers.store (0, memory_order_relaxed);
}
}
return true;
}
// Acquire a RW lock for writing.
void
gtm_rwlock::write_lock ()
{
write_lock_generic (0);
}
// Upgrade a RW lock that has been locked for reading to a writing lock.
// Do this without possibility of another writer incoming. Return false
// if this attempt fails (i.e. another thread also upgraded).
bool
gtm_rwlock::write_upgrade (gtm_thread *tx)
{
return write_lock_generic (tx);
}
// Has to be called iff the previous upgrade was successful and after it is
// safe for the transaction to not be marked as a reader anymore.
void
gtm_rwlock::write_upgrade_finish (gtm_thread *tx)
{
// We are not a reader anymore. This is only safe to do after we have
// acquired the writer lock.
tx->shared_state.store (-1, memory_order_release);
}
// Release a RW lock from reading.
void
gtm_rwlock::read_unlock (gtm_thread *tx)
{
// We only need release memory order here because of privatization safety
// (this ensures that marking the transaction as inactive happens after
// any prior data accesses by this transaction, and that neither the
// compiler nor the hardware order this store earlier).
// ??? We might be able to avoid this release here if the compiler can't
// merge the release fence with the subsequent seq_cst fence.
tx->shared_state.store (-1, memory_order_release);
// If there is a writer waiting for readers, wake it up. We need the fence
// to avoid lost wake-ups. Furthermore, the privatization safety
// implementation in gtm_thread::try_commit() relies on the existence of
// this seq_cst fence.
// ??? We might not be the last active reader, so the wake-up might happen
// too early. How do we avoid this without slowing down readers too much?
// Each reader could scan the list of txns for other active readers but
// this can result in many cache misses. Use combining instead?
// TODO Sends out one wake-up for each reader in the worst case.
atomic_thread_fence (memory_order_seq_cst);
if (unlikely (writer_readers.load (memory_order_relaxed) > 0))
{
// No additional barrier needed here (see write_unlock()).
writer_readers.store (0, memory_order_relaxed);
futex_wake(&writer_readers, 1);
}
}
// Release a RW lock from writing.
void
gtm_rwlock::write_unlock ()
{
// This needs to have seq_cst memory order.
if (writers.fetch_sub (1) == 2)
{
// There might be waiting writers, so wake them.
writers.store (0, memory_order_relaxed);
if (futex_wake(&writers, 1) == 0)
{
// If we did not wake any waiting writers, we might indeed be the
// last writer (this can happen because write_lock_generic()
// exchanges 0 or 1 to 2 and thus might go to contended mode even if
// no other thread holds the write lock currently). Therefore, we
// have to wake up readers here as well. Execute a barrier after
// the previous relaxed reset of writers (Dekker-style), and fall
// through to the normal reader wake-up code.
atomic_thread_fence (memory_order_seq_cst);
}
else
return;
}
// No waiting writers, so wake up all waiting readers.
// Because the fetch_and_sub is a full barrier already, we don't need
// another barrier here (as in read_unlock()).
if (readers.load (memory_order_relaxed) > 0)
{
// No additional barrier needed here. The previous load must be in
// modification order because of the coherency constraints. Late stores
// by a reader are not a problem because readers do Dekker-style
// synchronization on writers.
readers.store (0, memory_order_relaxed);
futex_wake(&readers, INT_MAX);
}
}
} // namespace GTM
|