1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
|
///////////////////////////////////////////////////////////////////////////////
// Copyright Christopher Kormanyos 2017 - 2024.
// Distributed under the Boost Software License,
// Version 1.0. (See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef PARALLEL_FOR_2017_12_18_H // NOLINT(llvm-header-guard)
#define PARALLEL_FOR_2017_12_18_H
#include <algorithm>
#include <thread>
#include <vector>
namespace my_concurrency
{
template<typename index_type,
typename callable_function_type>
auto parallel_for(index_type start,
index_type end,
callable_function_type parallel_function) -> void
{
// Estimate the number of threads available.
const auto number_of_threads_hint =
static_cast<unsigned>
(
std::thread::hardware_concurrency()
);
const auto number_of_threads = // NOLINT(altera-id-dependent-backward-branch)
static_cast<unsigned>
(
(number_of_threads_hint == static_cast<unsigned>(UINT8_C(0))) ? static_cast<unsigned>(UINT8_C(4)) : number_of_threads_hint // NOLINT(altera-id-dependent-backward-branch)
);
// Set the size of a slice for the range functions.
const auto n =
static_cast<index_type>
(
static_cast<index_type>(end - start) + static_cast<index_type>(1)
);
const auto slice =
(std::max)
(
static_cast<index_type>(std::round(static_cast<float>(n) / static_cast<float>(number_of_threads))),
static_cast<index_type>(1)
);
// Inner loop.
const auto launch_range =
[¶llel_function](index_type index_lo, index_type index_hi)
{
for(auto i = index_lo; i < index_hi; ++i) // NOLINT(altera-id-dependent-backward-branch)
{
parallel_function(i);
}
};
// Create the thread pool and launch the jobs.
std::vector<std::thread> pool { };
pool.reserve(number_of_threads);
auto i1 = start;
auto i2 = (std::min)(static_cast<index_type>(start + slice), end);
for(auto i = static_cast<index_type>(0U); ((static_cast<index_type>(i + 1) < static_cast<index_type>(number_of_threads)) && (i1 < end)); ++i) // NOLINT(altera-id-dependent-backward-branch)
{
pool.emplace_back(launch_range, i1, i2);
i1 = i2;
i2 = (std::min)(static_cast<index_type>(i2 + slice), end);
}
if(i1 < end)
{
pool.emplace_back(launch_range, i1, end);
}
// Wait for the jobs to finish.
for(auto& thread_in_pool : pool)
{
if(thread_in_pool.joinable())
{
thread_in_pool.join();
}
}
}
// Provide a serial version for easy comparison.
template<typename index_type,
typename callable_function_type>
auto sequential_for(index_type start,
index_type end,
callable_function_type sequential_function) -> void
{
for(index_type i = start; i < end; ++i)
{
sequential_function(i);
}
}
} // namespace my_concurrency
#endif // PARALLEL_FOR_2017_12_18_H
|