1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
/**
* Copyright 2019-2024, XGBoost Contributors
*/
#include <dmlc/omp.h> // for omp_in_parallel
#include <gtest/gtest.h>
#include <cstddef> // for std::size_t
#include "../../../src/common/threading_utils.h" // BlockedSpace2d,ParallelFor2d,ParallelFor
#include "xgboost/context.h" // Context
namespace xgboost::common {
TEST(ParallelFor2d, CreateBlockedSpace2d) {
constexpr size_t kDim1 = 5;
constexpr size_t kDim2 = 3;
constexpr size_t kGrainSize = 1;
BlockedSpace2d space(
kDim1, [&](size_t) { return kDim2; }, kGrainSize);
ASSERT_EQ(kDim1 * kDim2, space.Size());
for (size_t i = 0; i < kDim1; i++) {
for (size_t j = 0; j < kDim2; j++) {
ASSERT_EQ(space.GetFirstDimension(i*kDim2 + j), i);
ASSERT_EQ(j, space.GetRange(i*kDim2 + j).begin());
ASSERT_EQ(j + kGrainSize, space.GetRange(i*kDim2 + j).end());
}
}
}
TEST(ParallelFor2d, Test) {
constexpr size_t kDim1 = 100;
constexpr size_t kDim2 = 15;
constexpr size_t kGrainSize = 2;
// working space is matrix of size (kDim1 x kDim2)
std::vector<int> matrix(kDim1 * kDim2, 0);
BlockedSpace2d space(
kDim1, [&](size_t) { return kDim2; }, kGrainSize);
Context ctx;
ctx.UpdateAllowUnknown(Args{{"nthread", "4"}});
ASSERT_EQ(ctx.nthread, 4);
ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) {
matrix[i * kDim2 + j] += 1;
}
});
for (size_t i = 0; i < kDim1 * kDim2; i++) {
ASSERT_EQ(matrix[i], 1);
}
}
TEST(ParallelFor2d, NonUniform) {
constexpr size_t kDim1 = 5;
constexpr size_t kGrainSize = 256;
// here are quite non-uniform distribution in space
// but ParallelFor2d should split them by blocks with max size = kGrainSize
// and process in balanced manner (optimal performance)
std::vector<size_t> dim2 { 1024, 500, 255, 5, 10000 };
BlockedSpace2d space(kDim1, [&](size_t i) {
return dim2[i];
}, kGrainSize);
std::vector<std::vector<int>> working_space(kDim1);
for (size_t i = 0; i < kDim1; i++) {
working_space[i].resize(dim2[i], 0);
}
Context ctx;
ctx.UpdateAllowUnknown(Args{{"nthread", "4"}});
ASSERT_EQ(ctx.nthread, 4);
ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) {
working_space[i][j] += 1;
}
});
for (size_t i = 0; i < kDim1; i++) {
for (size_t j = 0; j < dim2[i]; j++) {
ASSERT_EQ(working_space[i][j], 1);
}
}
}
TEST(ParallelFor, Basic) {
Context ctx;
std::size_t n{16};
auto n_threads = ctx.Threads();
ParallelFor(n, n_threads, [&](auto i) {
ASSERT_EQ(ctx.Threads(), 1);
if (n_threads > 1) {
ASSERT_TRUE(omp_in_parallel());
}
ASSERT_LT(i, n);
});
ASSERT_FALSE(omp_in_parallel());
ParallelFor(n, 1, [&](auto) { ASSERT_FALSE(omp_in_parallel()); });
}
TEST(OmpGetNumThreads, Max) {
#if defined(_OPENMP)
auto n_threads = OmpGetNumThreads(1 << 18);
ASSERT_LE(n_threads, std::thread::hardware_concurrency()); // le due to container
n_threads = OmpGetNumThreads(0);
ASSERT_GE(n_threads, 1);
ASSERT_LE(n_threads, std::thread::hardware_concurrency());
#endif
}
} // namespace xgboost::common
|