1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
|
// Copyright (C) 2021-2025 Igor Baratta and Paul T. Kühner
//
// This file is part of DOLFINx (https://www.fenicsproject.org)
//
// SPDX-License-Identifier: LGPL-3.0-or-later
#pragma once
#include <algorithm>
#include <bit>
#include <cassert>
#include <concepts>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
#include <numeric>
#include <span>
#include <type_traits>
#include <utility>
#include <vector>
namespace dolfinx
{
struct __unsigned_projection
{
// Transforms the projected value to an unsigned int (if signed), while
// maintaining relative order by
// x ↦ x + |std::numeric_limits<I>::min()|
template <std::signed_integral T>
constexpr std::make_unsigned_t<T> operator()(T e) const noexcept
{
using uT = std::make_unsigned_t<T>;
// Assert binary structure for bit shift
static_assert(static_cast<uT>(std::numeric_limits<T>::min())
+ static_cast<uT>(std::numeric_limits<T>::max())
== static_cast<uT>(T(-1)));
static_assert(std::numeric_limits<uT>::digits
== std::numeric_limits<T>::digits + 1);
static_assert(std::bit_cast<uT>(std::numeric_limits<T>::min())
== (uT(1) << (sizeof(T) * 8 - 1)));
return std::bit_cast<uT>(std::forward<T>(e))
^ (uT(1) << (sizeof(T) * 8 - 1));
}
};
/// Projection from signed to signed int
inline constexpr __unsigned_projection unsigned_projection{};
struct __radix_sort
{
/// @brief Sort a range with radix sorting algorithm. The bucket size
/// is determined by the number of bits to sort at a time (2^BITS).
///
/// This allows usage with standard range containers of integral
/// types, for example
/// @code
/// std::array<std::int16_t, 3> a{2, 3, 1};
/// dolfixn::radix_sort(a); // a = {1, 2, 3}
/// @endcode
/// Additionally the projection based approach of the STL library is
/// adapted, which allows for versatile usage, for example the easy
/// realization of an argsort
/// @code
/// std::array<std::int16_t, 3> a{2, 3, 1};
/// std::array<std::int16_t, 3> i{0, 1, 2};
/// dolfinx::radix_sort(i, [&](auto i){ return a[i]; }); // yields i = {2, 0,
/// 1} and a[i] = {1, 2, 3};
/// @endcode
/// @tparam R Type of range to be sorted.
/// @tparam P Projection type to be applied on range elements to
/// produce a sorting index.
/// @tparam BITS The number of bits to sort at a time.
/// @param[in, out] range The range to sort.
/// @param[in] P Element projection.
template <std::ranges::random_access_range R, typename P = std::identity,
std::make_unsigned_t<std::remove_cvref_t<
std::invoke_result_t<P, std::iter_value_t<R>>>>
BITS
= 8>
requires std::integral<decltype(BITS)>
constexpr void operator()(R&& range, P proj = {}) const
{
// value type
using T = std::iter_value_t<R>;
// index type (if no projection is provided it holds I == T)
using I = std::remove_cvref_t<std::invoke_result_t<P, T>>;
using uI = std::make_unsigned_t<I>;
if constexpr (!std::is_same_v<uI, I>)
{
__radix_sort()(std::forward<R>(range), [&](const T& e) -> uI
{ return unsigned_projection(proj(e)); });
return;
}
if (range.size() <= 1)
return;
uI max_value = proj(*std::ranges::max_element(range, std::less{}, proj));
// Sort N bits at a time
constexpr uI bucket_size = 1 << BITS;
uI mask = (uI(1) << BITS) - 1;
// Compute number of iterations, most significant digit (N bits) of
// maxvalue
I its = 0;
// optimize for case where all first bits are set - then order will not
// depend on it
bool all_first_bit = std::ranges::all_of(
range, [&](const auto& e)
{ return proj(e) & (uI(1) << (sizeof(uI) * 8 - 1)); });
if (all_first_bit)
max_value = max_value & ~(uI(1) << (sizeof(uI) * 8 - 1));
while (max_value)
{
max_value >>= BITS;
its++;
}
// Adjacency list arrays for computing insertion position
std::array<I, bucket_size> counter;
std::array<I, bucket_size + 1> offset;
uI mask_offset = 0;
std::vector<T> buffer(range.size());
std::span<T> current_perm = range;
std::span<T> next_perm = buffer;
for (I i = 0; i < its; i++)
{
// Zero counter array
std::ranges::fill(counter, 0);
// Count number of elements per bucket
for (const auto& c : current_perm)
counter[(proj(c) & mask) >> mask_offset]++;
// Prefix sum to get the inserting position
offset[0] = 0;
std::partial_sum(counter.begin(), counter.end(),
std::next(offset.begin()));
for (const auto& c : current_perm)
{
uI bucket = (proj(c) & mask) >> mask_offset;
uI new_pos = offset[bucket + 1] - counter[bucket];
next_perm[new_pos] = c;
counter[bucket]--;
}
mask = mask << BITS;
mask_offset += BITS;
std::swap(current_perm, next_perm);
}
// Copy data back to array
if (its % 2 != 0)
std::ranges::copy(buffer, range.begin());
}
};
/// Radix sort
inline constexpr __radix_sort radix_sort{};
/// @brief Compute the permutation array that sorts a 2D array by row.
///
/// @param[in] x The flattened 2D array to compute the permutation array
/// for (row-major storage).
/// @param[in] shape1 The number of columns of `x`.
/// @return The permutation array such that `x[perm[i]] <= x[perm[i
/// +1]].
/// @pre `x.size()` must be a multiple of `shape1`.
/// @note This function is suitable for small values of `shape1`. Each
/// column of `x` is copied into an array that is then sorted.
template <typename T, int BITS = 16>
std::vector<std::int32_t> sort_by_perm(std::span<const T> x, std::size_t shape1)
{
static_assert(std::is_integral_v<T>, "Integral required.");
if (x.empty())
return std::vector<std::int32_t>{};
assert(shape1 > 0);
assert(x.size() % shape1 == 0);
const std::size_t shape0 = x.size() / shape1;
std::vector<std::int32_t> perm(shape0);
std::iota(perm.begin(), perm.end(), 0);
// Sort by each column, right to left. Col 0 has the most significant
// "digit".
std::vector<T> column(shape0);
for (std::size_t i = 0; i < shape1; ++i)
{
std::size_t col = shape1 - 1 - i;
for (std::size_t j = 0; j < shape0; ++j)
column[j] = x[j * shape1 + col];
radix_sort(perm, [&column](auto index) { return column[index]; });
}
return perm;
}
} // namespace dolfinx
|