File: sort.h

package info (click to toggle)
fenics-dolfinx 1%3A0.10.0.post4-1exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 6,028 kB
  • sloc: cpp: 36,535; python: 25,391; makefile: 226; sh: 171; xml: 55
file content (211 lines) | stat: -rw-r--r-- 6,569 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
// Copyright (C) 2021-2025 Igor Baratta and Paul T. Kühner
//
// This file is part of DOLFINx (https://www.fenicsproject.org)
//
// SPDX-License-Identifier:    LGPL-3.0-or-later

#pragma once

#include <algorithm>
#include <bit>
#include <cassert>
#include <concepts>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
#include <numeric>
#include <span>
#include <type_traits>
#include <utility>
#include <vector>

namespace dolfinx
{

struct __unsigned_projection
{
  // Transforms the projected value to an unsigned int (if signed), while
  // maintaining relative order by
  //    x ↦ x + |std::numeric_limits<I>::min()|
  template <std::signed_integral T>
  constexpr std::make_unsigned_t<T> operator()(T e) const noexcept
  {
    using uT = std::make_unsigned_t<T>;

    // Assert binary structure for bit shift
    static_assert(static_cast<uT>(std::numeric_limits<T>::min())
                      + static_cast<uT>(std::numeric_limits<T>::max())
                  == static_cast<uT>(T(-1)));
    static_assert(std::numeric_limits<uT>::digits
                  == std::numeric_limits<T>::digits + 1);
    static_assert(std::bit_cast<uT>(std::numeric_limits<T>::min())
                  == (uT(1) << (sizeof(T) * 8 - 1)));

    return std::bit_cast<uT>(std::forward<T>(e))
           ^ (uT(1) << (sizeof(T) * 8 - 1));
  }
};

/// Projection from signed to signed int
inline constexpr __unsigned_projection unsigned_projection{};

struct __radix_sort
{
  /// @brief Sort a range with radix sorting algorithm. The bucket size
  /// is determined by the number of bits to sort at a time (2^BITS).
  ///
  /// This allows usage with standard range containers of integral
  /// types, for example
  /// @code
  /// std::array<std::int16_t, 3> a{2, 3, 1};
  /// dolfixn::radix_sort(a); // a = {1, 2, 3}
  /// @endcode
  /// Additionally the projection based approach of the STL library is
  /// adapted, which allows for versatile usage, for example the easy
  /// realization of an argsort
  /// @code
  /// std::array<std::int16_t, 3> a{2, 3, 1};
  /// std::array<std::int16_t, 3> i{0, 1, 2};
  /// dolfinx::radix_sort(i, [&](auto i){ return a[i]; }); // yields i = {2, 0,
  /// 1} and a[i] = {1, 2, 3};
  /// @endcode
  /// @tparam R Type of range to be sorted.
  /// @tparam P Projection type to be applied on range elements to
  /// produce a sorting index.
  /// @tparam BITS The number of bits to sort at a time.
  /// @param[in, out] range The range to sort.
  /// @param[in] P Element projection.
  template <std::ranges::random_access_range R, typename P = std::identity,
            std::make_unsigned_t<std::remove_cvref_t<
                std::invoke_result_t<P, std::iter_value_t<R>>>>
                BITS
            = 8>
    requires std::integral<decltype(BITS)>
  constexpr void operator()(R&& range, P proj = {}) const
  {
    // value type
    using T = std::iter_value_t<R>;

    // index type (if no projection is provided it holds I == T)
    using I = std::remove_cvref_t<std::invoke_result_t<P, T>>;
    using uI = std::make_unsigned_t<I>;

    if constexpr (!std::is_same_v<uI, I>)
    {
      __radix_sort()(std::forward<R>(range), [&](const T& e) -> uI
                     { return unsigned_projection(proj(e)); });
      return;
    }

    if (range.size() <= 1)
      return;

    uI max_value = proj(*std::ranges::max_element(range, std::less{}, proj));

    // Sort N bits at a time
    constexpr uI bucket_size = 1 << BITS;
    uI mask = (uI(1) << BITS) - 1;

    // Compute number of iterations, most significant digit (N bits) of
    // maxvalue
    I its = 0;

    // optimize for case where all first bits are set - then order will not
    // depend on it
    bool all_first_bit = std::ranges::all_of(
        range, [&](const auto& e)
        { return proj(e) & (uI(1) << (sizeof(uI) * 8 - 1)); });
    if (all_first_bit)
      max_value = max_value & ~(uI(1) << (sizeof(uI) * 8 - 1));

    while (max_value)
    {
      max_value >>= BITS;
      its++;
    }

    // Adjacency list arrays for computing insertion position
    std::array<I, bucket_size> counter;
    std::array<I, bucket_size + 1> offset;

    uI mask_offset = 0;
    std::vector<T> buffer(range.size());
    std::span<T> current_perm = range;
    std::span<T> next_perm = buffer;
    for (I i = 0; i < its; i++)
    {
      // Zero counter array
      std::ranges::fill(counter, 0);

      // Count number of elements per bucket
      for (const auto& c : current_perm)
        counter[(proj(c) & mask) >> mask_offset]++;

      // Prefix sum to get the inserting position
      offset[0] = 0;
      std::partial_sum(counter.begin(), counter.end(),
                       std::next(offset.begin()));
      for (const auto& c : current_perm)
      {
        uI bucket = (proj(c) & mask) >> mask_offset;
        uI new_pos = offset[bucket + 1] - counter[bucket];
        next_perm[new_pos] = c;
        counter[bucket]--;
      }

      mask = mask << BITS;
      mask_offset += BITS;

      std::swap(current_perm, next_perm);
    }

    // Copy data back to array
    if (its % 2 != 0)
      std::ranges::copy(buffer, range.begin());
  }
};

/// Radix sort
inline constexpr __radix_sort radix_sort{};

/// @brief Compute the permutation array that sorts a 2D array by row.
///
/// @param[in] x The flattened 2D array to compute the permutation array
/// for (row-major storage).
/// @param[in] shape1 The number of columns of `x`.
/// @return The permutation array such that `x[perm[i]] <= x[perm[i
/// +1]].
/// @pre `x.size()` must be a multiple of `shape1`.
/// @note This function is suitable for small values of `shape1`. Each
/// column of `x` is copied into an array that is then sorted.
template <typename T, int BITS = 16>
std::vector<std::int32_t> sort_by_perm(std::span<const T> x, std::size_t shape1)
{
  static_assert(std::is_integral_v<T>, "Integral required.");

  if (x.empty())
    return std::vector<std::int32_t>{};

  assert(shape1 > 0);
  assert(x.size() % shape1 == 0);
  const std::size_t shape0 = x.size() / shape1;
  std::vector<std::int32_t> perm(shape0);
  std::iota(perm.begin(), perm.end(), 0);

  // Sort by each column, right to left. Col 0 has the most significant
  // "digit".
  std::vector<T> column(shape0);
  for (std::size_t i = 0; i < shape1; ++i)
  {
    std::size_t col = shape1 - 1 - i;
    for (std::size_t j = 0; j < shape0; ++j)
      column[j] = x[j * shape1 + col];

    radix_sort(perm, [&column](auto index) { return column[index]; });
  }

  return perm;
}

} // namespace dolfinx