| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 
 | // -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP_BARRIER
#define _LIBCPP_BARRIER
/*
    barrier synopsis
namespace std
{
  template<class CompletionFunction = see below>
  class barrier
  {
  public:
    using arrival_token = see below;
    static constexpr ptrdiff_t max() noexcept;
    constexpr explicit barrier(ptrdiff_t phase_count,
                               CompletionFunction f = CompletionFunction());
    ~barrier();
    barrier(const barrier&) = delete;
    barrier& operator=(const barrier&) = delete;
    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
    void wait(arrival_token&& arrival) const;
    void arrive_and_wait();
    void arrive_and_drop();
  private:
    CompletionFunction completion; // exposition only
  };
}
*/
#include <__config>
#ifdef _LIBCPP_HAS_NO_THREADS
#  error "<barrier> is not supported since libc++ has been configured without support for threads."
#endif
#include <__assert> // all public C++ headers provide the assertion handler
#include <__atomic/atomic_base.h>
#include <__atomic/memory_order.h>
#include <__availability>
#include <__memory/unique_ptr.h>
#include <__thread/poll_with_backoff.h>
#include <__thread/timed_backoff_policy.h>
#include <__utility/move.h>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <version>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#  pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
#if _LIBCPP_STD_VER >= 14
_LIBCPP_BEGIN_NAMESPACE_STD
struct __empty_completion {
  inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
};
#  ifndef _LIBCPP_HAS_NO_TREE_BARRIER
/*
The default implementation of __barrier_base is a classic tree barrier.
It looks different from literature pseudocode for two main reasons:
 1. Threads that call into std::barrier functions do not provide indices,
    so a numbering step is added before the actual barrier algorithm,
    appearing as an N+1 round to the N rounds of the tree barrier.
 2. A great deal of attention has been paid to avoid cache line thrashing
    by flattening the tree structure into cache-line sized arrays, that
    are indexed in an efficient way.
*/
using __barrier_phase_t = uint8_t;
class __barrier_algorithm_base;
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
__construct_barrier_algorithm_base(ptrdiff_t& __expected);
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase);
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
template <class _CompletionF>
class __barrier_base {
  ptrdiff_t __expected_;
  unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
  __atomic_base<ptrdiff_t> __expected_adjustment_;
  _CompletionF __completion_;
  __atomic_base<__barrier_phase_t> __phase_;
public:
  using arrival_token = __barrier_phase_t;
  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
  __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
      : __expected_(__expected),
        __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
        __expected_adjustment_(0),
        __completion_(std::move(__completion)),
        __phase_(0) {}
  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
        __update <= __expected_, "update is greater than the expected count for the current barrier phase");
    auto const __old_phase = __phase_.load(memory_order_relaxed);
    for (; __update; --__update)
      if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
        __completion_();
        __expected_ += __expected_adjustment_.load(memory_order_relaxed);
        __expected_adjustment_.store(0, memory_order_relaxed);
        __phase_.store(__old_phase + 2, memory_order_release);
        __phase_.notify_all();
      }
    return __old_phase;
  }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
    auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
    std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
  }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
    __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
    (void)arrive(1);
  }
};
#  else
/*
The alternative implementation of __barrier_base is a central barrier.
Two versions of this algorithm are provided:
 1. A fairly straightforward implementation of the litterature for the
    general case where the completion function is not empty.
 2. An optimized implementation that exploits 2's complement arithmetic
    and well-defined overflow in atomic arithmetic, to handle the phase
    roll-over for free.
*/
template <class _CompletionF>
class __barrier_base {
  __atomic_base<ptrdiff_t> __expected;
  __atomic_base<ptrdiff_t> __arrived;
  _CompletionF __completion;
  __atomic_base<bool> __phase;
public:
  using arrival_token = bool;
  static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
  _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
      : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
  [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
    auto const __old_phase  = __phase.load(memory_order_relaxed);
    auto const __result     = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
    auto const new_expected = __expected.load(memory_order_relaxed);
    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
        update <= new_expected, "update is greater than the expected count for the current barrier phase");
    if (0 == __result) {
      __completion();
      __arrived.store(new_expected, memory_order_relaxed);
      __phase.store(!__old_phase, memory_order_release);
      __phase.notify_all();
    }
    return __old_phase;
  }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
    __phase.wait(__old_phase, memory_order_acquire);
  }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
    __expected.fetch_sub(1, memory_order_relaxed);
    (void)arrive(1);
  }
};
template <>
class __barrier_base<__empty_completion> {
  static constexpr uint64_t __expected_unit = 1ull;
  static constexpr uint64_t __arrived_unit  = 1ull << 32;
  static constexpr uint64_t __expected_mask = __arrived_unit - 1;
  static constexpr uint64_t __phase_bit     = 1ull << 63;
  static constexpr uint64_t __arrived_mask  = (__phase_bit - 1) & ~__expected_mask;
  __atomic_base<uint64_t> __phase_arrived_expected;
  static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
    return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
  }
public:
  using arrival_token = uint64_t;
  static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
  _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
      : __phase_arrived_expected(__init(__count)) {}
  [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
    auto const __inc = __arrived_unit * update;
    auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
        update <= __old, "update is greater than the expected count for the current barrier phase");
    if ((__old ^ (__old + __inc)) & __phase_bit) {
      __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
      __phase_arrived_expected.notify_all();
    }
    return __old & __phase_bit;
  }
  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
    auto const __test_fn = [=]() -> bool {
      uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
      return ((__current & __phase_bit) != __phase);
    };
    __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
  }
  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
    __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
    (void)arrive(1);
  }
};
#  endif // !_LIBCPP_HAS_NO_TREE_BARRIER
template <class _CompletionF = __empty_completion>
class barrier {
  __barrier_base<_CompletionF> __b_;
public:
  using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier(
      ptrdiff_t __count, _CompletionF __completion = _CompletionF())
      : __b_(__count, std::move(__completion)) {
    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
        __count >= 0,
        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
        __count <= max(),
        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
        "a value greater than max()");
  }
  barrier(barrier const&)            = delete;
  barrier& operator=(barrier const&) = delete;
  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
    return __b_.arrive(__update);
  }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
    __b_.wait(std::move(__phase));
  }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
};
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 14
_LIBCPP_POP_MACROS
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
#  include <atomic>
#  include <concepts>
#  include <iterator>
#  include <memory>
#  include <stdexcept>
#  include <variant>
#endif
#endif //_LIBCPP_BARRIER
 |