File: collaborative_call_once.h

package info (click to toggle)
onetbb 2022.3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 19,440 kB
  • sloc: cpp: 129,228; ansic: 9,745; python: 808; xml: 183; objc: 176; makefile: 66; sh: 66; awk: 41; javascript: 37
file content (256 lines) | stat: -rw-r--r-- 9,498 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
/*
    Copyright (c) 2021-2024 Intel Corporation

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
*/

#ifndef __TBB_collaborative_call_once_H
#define __TBB_collaborative_call_once_H

#include "task_arena.h"
#include "task_group.h"

#include <atomic>

namespace tbb {
namespace detail {
namespace d1 {

#if _MSC_VER && !defined(__INTEL_COMPILER)
    // Suppress warning: structure was padded due to alignment specifier
    #pragma warning (push)
    #pragma warning (disable: 4324)
#endif

template <typename F>
class collaborative_call_stack_task : public task {
    const F& m_func;
    wait_context& m_wait_ctx;

    void finalize() {
        m_wait_ctx.release();
    }
    task* execute(d1::execution_data&) override {
        task* res = d2::task_ptr_or_nullptr(m_func);
        finalize();
        return res;
    }
    task* cancel(d1::execution_data&) override {
        finalize();
        return nullptr;
    }
public:
    collaborative_call_stack_task(const F& f, wait_context& wctx) : m_func(f), m_wait_ctx(wctx) {}
};

constexpr std::uintptr_t collaborative_once_max_references = max_nfs_size;
constexpr std::uintptr_t collaborative_once_references_mask = collaborative_once_max_references-1;

class alignas(max_nfs_size) collaborative_once_runner : no_copy {

    struct storage_t {
        task_arena m_arena{ task_arena::attach{} };
        wait_context m_wait_context{1};
    };

    std::atomic<std::int64_t> m_ref_count{0};
    std::atomic<bool> m_is_ready{false};

    // Storage with task_arena and wait_context must be initialized only by winner thread
    union {
        storage_t m_storage;
    };

    template<typename Fn>
    void isolated_execute(Fn f) {
        auto func = [f] {
            f();
           // delegate_base requires bool returning functor while isolate_within_arena ignores the result
            return true;
        };

        delegated_function<decltype(func)> delegate(func);

        r1::isolate_within_arena(delegate, reinterpret_cast<std::intptr_t>(this));
    }

public:
    class lifetime_guard : no_copy {
        collaborative_once_runner& m_runner;
    public:
        lifetime_guard(collaborative_once_runner& r) : m_runner(r) {
            m_runner.m_ref_count++;
        }
        ~lifetime_guard() {
            m_runner.m_ref_count--;
        }
    };

    collaborative_once_runner() {}

    ~collaborative_once_runner() {
        spin_wait_until_eq(m_ref_count, 0, std::memory_order_acquire);
        if (m_is_ready.load(std::memory_order_relaxed)) {
            m_storage.~storage_t();
        }
    }

    std::uintptr_t to_bits() {
        return reinterpret_cast<std::uintptr_t>(this);
    }

    static collaborative_once_runner* from_bits(std::uintptr_t bits) {
        __TBB_ASSERT( (bits & collaborative_once_references_mask) == 0, "invalid pointer, last log2(max_nfs_size) bits must be zero" );
        return reinterpret_cast<collaborative_once_runner*>(bits);
    }

    template <typename F>
    void run_once(F&& f) {
        __TBB_ASSERT(!m_is_ready.load(std::memory_order_relaxed), "storage with task_arena and wait_context is already initialized");
        // Initialize internal state
        new(&m_storage) storage_t();
        m_storage.m_arena.execute([&] {
            isolated_execute([&] {
                task_group_context context{ task_group_context::bound,
                    task_group_context::default_traits | task_group_context::concurrent_wait };

                collaborative_call_stack_task<F> t{ std::forward<F>(f), m_storage.m_wait_context };

                // Set the ready flag after entering the execute body to prevent
                // moonlighting threads from occupying all slots inside the arena.
                m_is_ready.store(true, std::memory_order_release);
                execute_and_wait(t, context, m_storage.m_wait_context, context);
            });
        });
    }

    void assist() noexcept {
        // Do not join the arena until the winner thread takes the slot
        spin_wait_while_eq(m_is_ready, false);
        m_storage.m_arena.execute([&] {
            isolated_execute([&] {
                // We do not want to get an exception from user functor on moonlighting threads.
                // The exception is handled with the winner thread
                task_group_context stub_context;
                wait(m_storage.m_wait_context, stub_context);
            });
        });
    }

};

class collaborative_once_flag : no_copy {
    enum state : std::uintptr_t {
        uninitialized,
        done,
#if TBB_USE_ASSERT
        dead
#endif
    };
    std::atomic<std::uintptr_t> m_state{ state::uninitialized };

    template <typename Fn, typename... Args>
    friend void collaborative_call_once(collaborative_once_flag& flag, Fn&& f, Args&&... args);

    void set_completion_state(std::uintptr_t runner_bits, std::uintptr_t desired) {
        std::uintptr_t expected = runner_bits;
        do {
            expected = runner_bits;
            // Possible inefficiency: when we start waiting,
            // some moonlighting threads might continue coming that will prolong our waiting.
            // Fortunately, there are limited number of threads on the system so wait time is limited.
            spin_wait_until_eq(m_state, expected);
        } while (!m_state.compare_exchange_strong(expected, desired));
    }

    template <typename Fn>
    void do_collaborative_call_once(Fn&& f) {
        std::uintptr_t expected = m_state.load(std::memory_order_acquire);
        collaborative_once_runner runner;

        do {
            if (expected == state::uninitialized && m_state.compare_exchange_strong(expected, runner.to_bits())) {
                // Winner thread
                runner.run_once([&] {
                    try_call([&] {
                        std::forward<Fn>(f)();
                    }).on_exception([&] {
                        // Reset the state to uninitialized to allow other threads to try initialization again
                        set_completion_state(runner.to_bits(), state::uninitialized);
                    });
                    // We successfully executed functor
                    set_completion_state(runner.to_bits(), state::done);
                });
                break;
            } else {
                // Moonlighting thread: we need to add a reference to the state to prolong runner lifetime.
                // However, the maximum number of references are limited with runner alignment.
                // So, we use CAS loop and spin_wait to guarantee that references never exceed "max_value".
                do {
                    auto max_value = expected | collaborative_once_references_mask;
                    expected = spin_wait_while_eq(m_state, max_value);
                // "expected > state::done" prevents storing values, when state is uninitialized or done
                } while (expected > state::done && !m_state.compare_exchange_strong(expected, expected + 1));

                if (auto shared_runner = collaborative_once_runner::from_bits(expected & ~collaborative_once_references_mask)) {
                    collaborative_once_runner::lifetime_guard guard{*shared_runner};
                    m_state.fetch_sub(1);

                    // The moonlighting threads are not expected to handle exceptions from user functor.
                    // Therefore, no exception is expected from assist().
                    shared_runner->assist();
                }
            }
            __TBB_ASSERT(m_state.load(std::memory_order_relaxed) != state::dead,
                         "collaborative_once_flag has been prematurely destroyed");
        } while (expected != state::done);
    }

#if TBB_USE_ASSERT
public:
    ~collaborative_once_flag() {
        m_state.store(state::dead, std::memory_order_relaxed);
    }
#endif
};


template <typename Fn, typename... Args>
void collaborative_call_once(collaborative_once_flag& flag, Fn&& fn, Args&&... args) {
    __TBB_ASSERT(flag.m_state.load(std::memory_order_relaxed) != collaborative_once_flag::dead,
                 "collaborative_once_flag has been prematurely destroyed");
    if (flag.m_state.load(std::memory_order_acquire) != collaborative_once_flag::done) {
    #if __TBB_GCC_PARAMETER_PACK_IN_LAMBDAS_BROKEN
        // Using stored_pack to suppress bug in GCC 4.8
        // with parameter pack expansion in lambda
        auto stored_pack = save_pack(std::forward<Args>(args)...);
        auto func = [&] { call(std::forward<Fn>(fn), std::move(stored_pack)); };
    #else
        auto func = [&] { fn(std::forward<Args>(args)...); };
    #endif
        flag.do_collaborative_call_once(func);
    }
}

#if _MSC_VER && !defined(__INTEL_COMPILER)
    #pragma warning (pop) // 4324 warning
#endif

} // namespace d1
} // namespace detail

using detail::d1::collaborative_call_once;
using detail::d1::collaborative_once_flag;
} // namespace tbb

#endif // __TBB_collaborative_call_once_H