1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* Multiple Producer Single Consumer lock-free queue.
* Allocation-free is guaranteed outside of the constructor.
*
* This is a direct C++ port from
* https://docs.rs/signal-hook/0.3.17/src/signal_hook/low_level/channel.rs.html#1-235
* with the exception we are using atomic uint64t to have 15 slots in the ring
* buffer (Rust implem is 5 slots, we want a bit more).
* */
#ifndef mozilla_BoundedMPSCQueue_h
#define mozilla_BoundedMPSCQueue_h
#include "mozilla/Assertions.h"
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
namespace mozilla {
static constexpr bool MPSC_DEBUG = false;
/**
* This data structure allows producing data from several threads, and consuming
* it on one thread, safely and without performing memory allocations or
* locking.
*
* The role for the producers and the consumer must be constant, i.e., the
* producer should always be on one thread and the consumer should always be on
* another thread.
*
* Some words about the inner workings of this class:
* - Capacity is fixed. Only one allocation is performed, in the constructor.
* - Maximum capacity is 15 elements, with 0 being used to denote an empty set.
* This is a hard limitation from encoding indexes within the atomic uint64_t.
* - This is lock-free but not wait-free, it might spin a little until
* compare/exchange succeeds.
* - There is no guarantee of forward progression for individual threads.
* - This should be safe to use from a signal handler context.
*/
template <typename T, size_t kCapacity>
class MPSCRingBufferBase {
static constexpr size_t kMaxCapacity = 16;
public:
explicit MPSCRingBufferBase() : mFree(0), mOccupied(0) {
static_assert(kCapacity < kMaxCapacity);
if constexpr (MPSC_DEBUG) {
fprintf(stderr,
"[%s] this=%p { mCapacity=%zu, mBits=%" PRIu64
", mMask=0x%" PRIx64 " }\n",
__PRETTY_FUNCTION__, this, 1 + kCapacity, mBits, mMask);
}
// Leave one empty space in the queue, used to distinguish an empty queue
// from a full one, as in the SPSCQueue.
// https://docs.rs/signal-hook/0.3.17/src/signal_hook/low_level/channel.rs.html#126
for (uint64_t i = 1; i < StorageCapacity(); ++i) {
MarkSlot(mFree, i);
}
// This should be the only allocation performed, thus it cannot be performed
// in a restricted context (e.g., signal handler, real-time thread)
mData = std::make_unique<T[]>(Capacity());
std::atomic_thread_fence(std::memory_order_seq_cst);
}
/**
* @brief Put an element in the queue. The caller MUST check the return value
* and maybe loop to try again (or drop if acceptable).
*
* First it attempts to acquire a slot (storage index) that is known to be
* non used. If that is not successful then 0 is returned. If that is
* successful, the slot is ours (it has been exclusively acquired) and data
* can be copied into the ring buffer at that index.
*
* @param aElement The element to put in the queue.
*
* @return 0 if insertion could not be performed, inserted index otherwise
*/
[[nodiscard]] int Send(T& aElement) {
std::optional<uint64_t> empty_idx = UnmarkSlot(mFree);
if (empty_idx.has_value()) {
std::move(&aElement, &aElement + 1, &mData[*empty_idx - 1]);
MarkSlot(mOccupied, *empty_idx);
return *empty_idx;
}
return 0;
}
/**
* Retrieve one element from the ring buffer, and copy it to
* `aElement`, if non-null.
*
* It attempts to acquire a slot from the list of used ones. If that is not
* successfull, then 0 is returned. Once a slot has been exclusively acquired,
* data is copied from it into the non-null pointer passed in parameter.
*
* @param aElement A pointer to a `T` where data will be copied.
*
* @return The index from which data was copied, 0 if there was nothing in the
* ring buffer.
*/
[[nodiscard]] int Recv(T* aElement) {
std::optional<uint64_t> idx = UnmarkSlot(mOccupied);
if (idx.has_value()) {
if (aElement) {
std::move(&mData[*idx - 1], &mData[*idx], aElement);
}
MarkSlot(mFree, *idx);
return *idx;
}
return 0;
}
constexpr size_t Capacity() const { return StorageCapacity() - 1; }
private:
/*
* Get/Set manipulates the encoding within `aNumber` by storing the index as a
* number and shifting it to the left (set) or right (get).
*
* Initial `aNumber` value is 0.
*
* Set() with first index value (1), we store the index on mBits and we shift
* it to the left, e.g., as follows:
*
* aNumber=0b00000000000000000000000000000000000000000000000000000000000000
* aIndex=0 aValue=1
* aNumber=0b00000000000000000000000000000000000000000000000000000000000001
* aIndex=1 aValue=33
* aNumber=0b00000000000000000000000000000000000000000000000000000000100001
* aIndex=2 aValue=801
* aNumber=0b00000000000000000000000000000000000000000000000000001100100001
* aIndex=3 aValue=17185
* aNumber=0b00000000000000000000000000000000000000000000000100001100100001
* aIndex=4 aValue=344865
* aNumber=0b00000000000000000000000000000000000000000001010100001100100001
* aIndex=5 aValue=6636321
* aNumber=0b00000000000000000000000000000000000000011001010100001100100001
* aIndex=6 aValue=124076833
* aNumber=0b00000000000000000000000000000000000111011001010100001100100001
* aIndex=7 aValue=2271560481
* aNumber=0b00000000000000000000000000000010000111011001010100001100100001
* aIndex=8 aValue=40926266145
* aNumber=0b00000000000000000000000000100110000111011001010100001100100001
* aIndex=9 aValue=728121033505
* aNumber=0b00000000000000000000001010100110000111011001010100001100100001
* aIndex=10 aValue=12822748939041
* aNumber=0b00000000000000000010111010100110000111011001010100001100100001
* aIndex=11 aValue=223928981472033
* aNumber=0b00000000000000110010111010100110000111011001010100001100100001
* aIndex=12 aValue=3883103678710561
* aNumber=0b00000000001101110010111010100110000111011001010100001100100001
* aIndex=13 aValue=66933498461897505
* aNumber=0b00000011101101110010111010100110000111011001010100001100100001
* aIndex=14 aValue=1147797409030816545
*/
[[nodiscard]] uint64_t Get(uint64_t aNumber, uint64_t aIndex) {
return (aNumber >> (mBits * aIndex)) & mMask;
}
[[nodiscard]] uint64_t Set(uint64_t aNumber, uint64_t aIndex,
uint64_t aValue) {
return (aNumber & ~(mMask << (mBits * aIndex))) |
(aValue << (mBits * aIndex));
}
/*
* Enqueue a value in the ring buffer at aIndex.
*
* Takes the current uint64_t value from the atomic and try to acquire a non
* used slot in the ring buffer. If unsuccessful, 0 is returned, otherwise
* compute the new atomic value that holds the new state of usage of the
* slots, and use compare/exchange to perform lock-free synchronization:
* compare/exchanges succeeds when the current value and the modified one are
* equal, reflecting an acquired lock. If another thread was concurrent to
* this one, then it would fail to that operation, and go into the next
* iteration of the loop to read the new state value from the atomic, and
* acquire a different slot.
*
* @param aSlotStatus a uint64_t atomic that is used to perform lock-free
* thread exclusions
*
* @param aIndex the index where we want to enqueue. It should come from the
* empty queue
* */
void MarkSlot(std::atomic<uint64_t>& aSlotStatus, uint64_t aIndex) {
uint64_t current = aSlotStatus.load(std::memory_order_relaxed);
do {
// Attempts to find a slot that is available to enqueue, without
// cross-thread synchronization
auto empty = [&]() -> std::optional<uint64_t> {
for (uint64_t i = 0; i < Capacity(); ++i) {
if (Get(current, i) == 0) {
return i;
}
}
return {};
}();
if (!empty.has_value()) {
// Rust does expect() which would panic:
// https://docs.rs/signal-hook/0.3.17/src/signal_hook/low_level/channel.rs.html#62
// If there's no empty place, then it would be up to the caller to deal
// with that
MOZ_CRASH("No empty slot available");
}
uint64_t modified = Set(current, *empty, aIndex);
// This is where the lock-free synchronization happens ; if `current`
// matches the content of `aSlotStatus`, then store `modified` in
// aSlotStatus and succeeds. Upon success it means no other thread has
// tried to change the same value at the same time, so the lock was safely
// acquired.
//
// Upon failure, it means another thread tried at the same time to use the
// same slot, so a new iteration of the loop needs to be executed to try
// another slot.
//
// In case of success (`aSlotStatus`'s content is equal to `current`), we
// require memory_order_release for the read-modify-write operation
// because we want to make sure when acquiring a slot that any concurrent
// thread performing a write had a chance to do it.
//
// In case of failure we require memory_order_relaxed for the load
// operation because we dont need synchronization at that point.
if (aSlotStatus.compare_exchange_weak(current, modified,
std::memory_order_release,
std::memory_order_relaxed)) {
if constexpr (MPSC_DEBUG) {
fprintf(stderr,
"[enqueue] modified=0x%" PRIx64 " => index=%" PRIu64 "\n",
modified, aIndex);
}
return;
}
} while (true);
}
/*
* Dequeue a value from the ring buffer.
*
* Takes the current value from the uint64_t atomic and read the current index
* out of it. If that index is 0 then we are facing a lack of slots and we
* return, the caller MUST check this and deal with the situation. If the
* index is non null we can try to acquire the matching slot in the ring
* buffer thanks to the compare/exchange loop. When the compare/exchange call
* succeeds, then the slot was acquired.
*
* @param aSlotStatus a uint64_t atomic that is used to perform lock-free
* thread exclusions
* */
[[nodiscard]] std::optional<uint64_t> UnmarkSlot(
std::atomic<uint64_t>& aSlotStatus) {
uint64_t current = aSlotStatus.load(std::memory_order_relaxed);
do {
uint64_t index = current & mMask;
if (index == 0) {
// Return a None
// https://docs.rs/signal-hook/0.3.17/src/signal_hook/low_level/channel.rs.html#77
// If we return None while dequeuing on mFree then we are full and the
// caller needs to deal with that.
return {};
}
uint64_t modified = current >> mBits;
// See the comment in MarkSlot for details
//
// In case of success (`aSlotStatus`'s content is equal to `current`), we
// require memory_order_acquire for the read-modify-write operation
// because we want to make sure when unmarking a slot that any concurrent
// thread performing a read will see the value we are writing.
//
// In case of failure we require memory_order_relaxed for the load
// operation because we don't need synchronization at that point.
if (aSlotStatus.compare_exchange_weak(current, modified,
std::memory_order_acquire,
std::memory_order_relaxed)) {
if constexpr (MPSC_DEBUG) {
fprintf(stderr,
"[dequeue] current=0x%" PRIx64 " => index=%" PRIu64 "\n",
current, index);
}
return index;
}
} while (true);
return {};
}
// Return the number of elements we can store within the ring buffer, whereas
// Capacity() will return the amount of elements in mData, including the 0
// value.
[[nodiscard]] constexpr size_t StorageCapacity() const {
return 1 + kCapacity;
}
// For the atomics below they are manipulated by Get()/Set(), and we are using
// them to store the IDs of the ring buffer usage (empty/full).
//
// We use mBits bits to store an ID (so we are limited to 16 and 0 is
// reserved) and append each of them to the atomics.
//
// A 0 value in one of those denotes we are full for the atomic, i.e.,
// mFree=0 means we are full and mOccupied=0 means we are empty.
// Holds the IDs of the free slots in the ring buffer
std::atomic<uint64_t> mFree;
// Holds the IDs of the occupied slots in the ring buffer
std::atomic<uint64_t> mOccupied;
// The actual ring buffer
std::unique_ptr<T[]> mData;
// How we are using the uint64_t atomic above to store the IDs of the ring
// buffer.
static constexpr uint64_t mBits = 4;
static constexpr uint64_t mMask = 0b1111;
};
/**
* Instantiation of the `MPSCRingBufferBase` type. This is safe to use from
* several producers threads and one one consumer (that never changes role),
* without explicit synchronization nor allocation (outside of the constructor).
*/
template <typename T, size_t Capacity>
using BoundedMPSCQueue = MPSCRingBufferBase<T, Capacity>;
} // namespace mozilla
#endif // mozilla_BoundedMPSCQueue_h
|