File: precompute.h

package info (click to toggle)
fenics-basix 0.10.0.post0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,156 kB
  • sloc: cpp: 23,435; python: 10,829; makefile: 43; sh: 26
file content (309 lines) | stat: -rw-r--r-- 10,705 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
// Copyright (c) 2020 Matthew Scroggs
// FEniCS Project
// SPDX-License-Identifier:    MIT

#pragma once

#include "math.h"
#include "mdspan.hpp"
#include <concepts>
#include <cstdint>
#include <span>
#include <tuple>
#include <type_traits>
#include <vector>

/// Matrix and permutation pre-computation
namespace basix::precompute
{
namespace impl
{
/// @private These structs are used to get the float/value type from a
/// template argument, including support for complex types.
template <typename T, typename = void>
struct scalar_value_type
{
  /// @internal
  typedef T value_type;
};
/// @private
template <typename T>
struct scalar_value_type<T, std::void_t<typename T::value_type>>
{
  typedef typename T::value_type value_type;
};
/// @private Convenience typedef
template <typename T>
using scalar_value_type_t = typename scalar_value_type<T>::value_type;
} // namespace impl

/// @brief Prepare a permutation.
///
/// Computes  representation of the permutation that allows the
/// permutation to be applied without any temporary memory assignment.
/// In pseudo code, this function does the following:
///
/// \code{.pseudo}
/// FOR index, entry IN perm:
///     new_index = entry
///     WHILE new_index < index:
///         new_index = perm[new_index]
///     OUT[index] = new_index
/// \endcode
///
/// Example
/// -------
/// Consider the permutation `P = [1, 4, 0, 5, 2, 3]`.
///
/// -# First, we look at the 0th entry. `P[0]` is 1. This is greater
/// than 0, so the 0th entry of the output is 1.
///
/// -# Next, we look at the 1st entry. `P[1]` is 4. This is greater than
///   1, so the 1st entry of the output is 4.
/// -# Next, we look at the 2nd entry. `P[2]` is 0. This is less than 2,
///   so we look at `P[0]`. `P[0]` is 1. This is less than 2, so we look at
///   `P[1]`. `P[1]` is 4. This is greater than 2, so the 2nd entry of the
///   output is 4.
/// -# Next, we look at the 3rd entry. `P[3]` is 5. This is greater than 3,
///   so the 3rd entry of the output is 5.
/// -# Next, we look at the 4th entry. `P[4]` is 2. This is less than 4, so
///   we look at `P[2]`. `P[2]` is 0. This is less than 4, so we look at
///  `P[0]`. `P[0]` is 1. This is less than 4, so we look at `P[1]`.
///  `P[1]` is 4. This is greater than (or equal to) 4, so the 4th entry
///  of the output is 4.
/// -# Next, we look at the 5th entry. `P[5]` is 3. This is less than 5,
///   so we look at `P[3]`. `P[3]` is 5. This is greater than (or equal
///   to) 5, so the 5th entry of the output is 5.
///
/// Hence, the output of this function in this case is `[1, 4, 4, 5, 4, 5]`.
///
/// For an example of how the permutation in this form is applied, see
/// apply_permutation().
///
/// @param[in,out] perm A permutation.
void prepare_permutation(std::span<std::size_t> perm);

/// @brief Apply a (precomputed) permutation \f$v = P u\f$.
///
/// This uses the representation returned by prepare_permutation() to
/// apply a permutation without needing any temporary memory. In pseudo
/// code, this function does the following:
///
/// \code{.pseudo}
/// FOR index, entry IN perm:
///     SWAP(data[index], data[entry])
/// \endcode
///
/// If `n` is set, this will apply the permutation to every block. The
/// `offset` is set, this will start applying the permutation at the
/// `offset`th block.
///
/// Example
/// -------
/// Consider the permutation `P = [1, 4, 0, 5, 2, 3]`. In the
/// documentation of prepare_permutation(), we saw that the precomputed
/// representation of this permutation is `P2 = [1, 4, 4, 5, 4, 5]`. In
/// this example, we look at how this representation can be used to
/// apply this permutation to the array `A = [a, b, c, d, e, f]`.
///
/// - `P2[0]` is 1, so we swap `A[0]` and `A[1]`. After this,
///   `A = [b, a, c, d, e, f]`.
///
/// - `P2[1]` is 4, so we swap `A[1]` and `A[4]`. After this, `A = [b, e, c, d, a, f]`.
///
/// - `P2[2]` is 4, so we swap `A[2]` and `A[4]`. After this, `A = [b, e, a, d, c, f]`.
///
/// - `P2[3]` is 5, so we swap `A[3]` and `A[5]`. After this, `A = [b,e,a,f,c,d]`.
///
/// - `P2[4]` is 4, so we swap `A[4]` and `A[4]`. This changes nothing.
///
/// - `P2[5]` is 5, so we swap `A[5]` and `A[5]`. This changes nothing.
///
/// Therefore the result of applying this permutation is `[b, e, a, f,
/// c, d]` (which is what we get if we apply the permutation directly).
///
/// @note This function is designed to be called at runtime, so its
/// performance is critical.
///
/// @param[in] perm A permutation in precomputed form (as returned by
/// prepare_permutation()).
/// @param[in,out] data The data to apply the permutation to. It has
/// shape `(m, n)` (uses row-major storage), where the permutation
/// matrix has shape `(m, m)`.
/// @param[in] offset The position in the data to start applying the
/// permutation.
/// @param[in] n The block size of the data.
template <typename E>
void apply_permutation(std::span<const std::size_t> perm, std::span<E> data,
                       std::size_t offset = 0, std::size_t n = 1)
{
  for (std::size_t i = 0; i < perm.size(); ++i)
    for (std::size_t b = 0; b < n; ++b)
      std::swap(data[n * (offset + i) + b], data[n * (offset + perm[i]) + b]);
}

/// @brief Permutation of mapped data.
template <typename E>
void apply_permutation_mapped(std::span<const std::size_t> perm,
                              std::span<E> data, std::span<const int> emap,
                              std::size_t n = 1)
{
  for (std::size_t i = 0; i < perm.size(); ++i)
    for (std::size_t b = 0; b < n; ++b)
      std::swap(data[n * emap[i] + b], data[n * emap[perm[i]] + b]);
}

/// @brief Apply a (precomputed) permutation to some transposed data.
///
/// Applies \f$v = u P^{T}\f$.
///
/// @note This function is designed to be called at runtime, so its
/// performance is critical.
///
/// See apply_permutation().
template <typename E>
void apply_inv_permutation_right(std::span<const std::size_t> perm,
                                 std::span<E> data, std::size_t offset = 0,
                                 std::size_t n = 1)
{
  const std::size_t dim = perm.size();
  const std::size_t data_size = (data.size() + (dim < n ? n - dim : 0)) / n;
  for (std::size_t b = 0; b < n; ++b)
  {
    for (std::size_t i = 0; i < dim; ++i)
    {
      std::swap(data[data_size * b + offset + i],
                data[data_size * b + offset + perm[i]]);
    }
  }
}

/// @brief Prepare a square matrix.
///
/// Computes the LU decomposition of the transpose of the matrix.
///
/// This function returns the permutation @f$P@f$P in the representation
/// @f$PA^t=LU@f$ (precomputed as in prepare_permutation()). The LU
/// decomposition of @f$A^t@f$ is computed in-place
///
/// For an example of how the permutation in this form is applied, see
/// apply_matrix().
///
/// @param[in,out] A The matrix data.
/// @return The three parts of a precomputed representation of the
/// matrix. These are (as described above):
/// - A permutation (precomputed as in prepare_permutation());
/// - the vector @f$D@f$;
template <std::floating_point T>
std::vector<std::size_t>
prepare_matrix(std::pair<std::vector<T>, std::array<std::size_t, 2>>& A)
{
  return math::transpose_lu<T>(A);
}

/// @brief Apply a (precomputed) matrix.
///
/// This uses the representation returned by prepare_matrix() to apply a
/// matrix without needing any temporary memory. In pseudo code, this
/// function does the following:
///
/// \code{.pseudo}
/// INPUT perm, mat, data
/// apply_permutation(perm, data)
/// FOR i IN RANGE(dim):
///     FOR j IN RANGE(i+1, dim):
///         data[i] += mat[i, j] * data[j]
/// FOR i IN RANGE(dim - 1, -1, -1):
///     data[i] *= M[i, i]
///     FOR j in RANGE(i):
///         data[i] += mat[i, j] * data[j]
/// \endcode
///
/// If `n` is set, this will apply the permutation to every block. The
/// `offset` is set, this will start applying the permutation at the
/// `offset`th block.
///
/// @note This function is designed to be called at runtime, so its
/// performance is critical.
///
/// @param[in] v_size_t A permutation, as computed by prepare_matrix().
/// @param[in] M The vector created by prepare_matrix().
/// @param[in,out] data The data to apply the permutation to
/// @param[in] offset The position in the data to start applying the
/// permutation
/// @param[in] n The block size of the data
template <typename T, typename E>
void apply_matrix(std::span<const std::size_t> v_size_t,
                  md::mdspan<const T, md::dextents<std::size_t, 2>> M,
                  std::span<E> data, std::size_t offset = 0, std::size_t n = 1)
{
  using U = typename impl::scalar_value_type_t<E>;

  const std::size_t dim = v_size_t.size();
  apply_permutation(v_size_t, data, offset, n);
  for (std::size_t b = 0; b < n; ++b)
  {
    for (std::size_t i = 0; i < dim; ++i)
    {
      for (std::size_t j = i + 1; j < dim; ++j)
      {
        data[n * (offset + i) + b]
            += static_cast<U>(M(i, j)) * data[n * (offset + j) + b];
      }
    }

    for (std::size_t i = 1; i <= dim; ++i)
    {
      data[n * (offset + dim - i) + b] *= static_cast<U>(M(dim - i, dim - i));
      for (std::size_t j = 0; j < dim - i; ++j)
      {
        data[n * (offset + dim - i) + b]
            += static_cast<U>(M(dim - i, j)) * data[n * (offset + j) + b];
      }
    }
  }
}

/// @brief Apply a (precomputed) matrix to some transposed data.
///
/// Computes \f$v^{T} = M u^{T}\f$ (or equivalently \f$v = u M^{T}\f$).
///
/// @note This function is designed to be called at runtime, so its
/// performance is critical.
///
/// See apply_matrix().
template <typename T, typename E>
void apply_tranpose_matrix_right(
    std::span<const std::size_t> v_size_t,
    md::mdspan<const T, md::dextents<std::size_t, 2>> M, std::span<E> data,
    std::size_t offset = 0, std::size_t n = 1)
{
  using U = typename impl::scalar_value_type_t<E>;

  const std::size_t dim = v_size_t.size();
  const std::size_t data_size = (data.size() + (dim < n ? n - dim : 0)) / n;
  apply_inv_permutation_right(v_size_t, data, offset, n);
  for (std::size_t b = 0; b < n; ++b)
  {
    for (std::size_t i = 0; i < dim; ++i)
    {
      for (std::size_t j = i + 1; j < dim; ++j)
      {
        data[data_size * b + offset + i]
            += static_cast<U>(M(i, j)) * data[data_size * b + offset + j];
      }
    }
    for (std::size_t i = 1; i <= dim; ++i)
    {
      data[data_size * b + offset + dim - i]
          *= static_cast<U>(M(dim - i, dim - i));
      for (std::size_t j = 0; j < dim - i; ++j)
      {
        data[data_size * b + offset + dim - i]
            += static_cast<U>(M(dim - i, j)) * data[data_size * b + offset + j];
      }
    }
  }
}

} // namespace basix::precompute