File: Flatten.cpp

package info (click to toggle)
js8call 2.5.1%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 24,720 kB
  • sloc: cpp: 562,655; sh: 898; python: 132; ansic: 102; makefile: 4
file content (232 lines) | stat: -rw-r--r-- 8,893 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#include "Flatten.h"
#include <algorithm>
#include <array>
#include <cmath>
#include <memory>
#include <numbers>
#include <utility>
#include <vector>
#include <Eigen/Dense>

// This is an emulation, in spirit at least, of the effect of of the
// Fortran flat4() subroutine. While our implementation differs from
// that of the original, the results should be as good or better.
//
// One key difference other than what's obvious below is that this
// isn't responsible for converting a power-scaled spectrum to dB.
// If you need to do that before sending data here, that's on you;
// std::transform and std::log10 are going to be your friends. We
// do flattening, and only flattening.
//
// Note that this is a functor; it's serially reusable, but it's not
// reentrant. Call it from one thread only. In practical use, that's
// not expected to be a problem, and it allows us to reuse allocated
// memory in a serial manner, rather than requesting it and freeing
// it constantly.

/******************************************************************************/
// Flatten Constants
/******************************************************************************/

namespace {
// Tunable settings; degree of the polynomial used for the baseline
// curve fit, and the percentile of the span at which to sample. In
// general, a 5th degree polynomial and the 10th percentile should
// be optimal.

constexpr auto FLATTEN_DEGREE = 5;
constexpr auto FLATTEN_SAMPLE = 10;

// We're going to do a pairwise Estrin's evaluation of the polynomial
// coefficients, so it's critical that the degree of the polynomial is
// odd, resulting in an even number of coefficients.

static_assert(FLATTEN_DEGREE & 1, "Degree must be odd");
static_assert(FLATTEN_SAMPLE >= 0 && FLATTEN_SAMPLE <= 100,
              "Sample must be a percentage");

// Since we know the degree of the polynomial, and thus the number of
// nodes that we're going to use, we can do all the trigonometry work
// required to calculate the Chebyshev nodes in advance, by computing
// them over the range [0, 1]; we can then scale these at runtime to
// a span of any size by simple multiplication.
//
// Downside to this with C++17 is that std::cos() is not yet constexpr,
// as it is in C++23, so we must provide our own implementation until
// then.

constexpr auto FLATTEN_NODES = []() {
    // Full-range cosine function using symmetries of cos(x).

    constexpr auto cos = [](double x) {
        constexpr auto RAD_360 = std::numbers::pi * 2;
        constexpr auto RAD_180 = std::numbers::pi;
        constexpr auto RAD_90 = std::numbers::pi / 2;

        // Polynomial approximation of cos(x) for x in [0, RAD_90],
        // Accuracy here in theory is 1e-18, but double precision
        // itself is only 1-e16, so within the domain of doubles,
        // this should be extremely accurate.

        constexpr auto cos = [](double x) {
            constexpr std::array coefficients = {
                1.0,                           // Coefficient for x^0
                -0.49999999999999994,          // Coefficient for x^2
                0.041666666666666664,          // Coefficient for x^4
                -0.001388888888888889,         // Coefficient for x^6
                0.000024801587301587,          // Coefficient for x^8
                -0.00000027557319223986,       // Coefficient for x^10
                0.00000000208767569878681,     // Coefficient for x^12
                -0.00000000001147074513875176, // Coefficient for x^14
                0.0000000000000477947733238733 // Coefficient for x^16
            };

            auto const x2 = x * x;
            auto const x4 = x2 * x2;
            auto const x6 = x4 * x2;
            auto const x8 = x4 * x4;
            auto const x10 = x8 * x2;
            auto const x12 = x8 * x4;
            auto const x14 = x12 * x2;
            auto const x16 = x8 * x8;

            return coefficients[0] + coefficients[1] * x2 +
                   coefficients[2] * x4 + coefficients[3] * x6 +
                   coefficients[4] * x8 + coefficients[5] * x10 +
                   coefficients[6] * x12 + coefficients[7] * x14 +
                   coefficients[8] * x16;
        };

        // Reduce x to [0, RAD_360)

        x -= static_cast<long long>(x / RAD_360) * RAD_360;

        // Map x to [0, RAD_180]

        if (x > RAD_180)
            x = RAD_360 - x;

        // Map x to [0, RAD_90] and evaluate the polynomial;
        // flip the sign for angles in the second quadrant.

        return x > RAD_90 ? -cos(RAD_180 - x) : cos(x);
    };

    // Down to the actual business of generating Chebyshev nodes
    // suitable for scaling; once we move to C++20 as the minimum
    // compiler, we can remove the cos() function above and instead
    // call std::cos() here, as it's required to be constexpr in
    // C++20 and above, and presumably it'll be of high quality.

    auto nodes = std::array<double, FLATTEN_DEGREE + 1>{};
    constexpr auto slice = std::numbers::pi / (2.0 * nodes.size());

    for (std::size_t i = 0; i < nodes.size(); ++i) {
        nodes[i] = 0.5 * (1.0 - cos(slice * (2.0 * i + 1)));
    }

    return nodes;
}();
} // namespace

/******************************************************************************/
// Private Implementation
/******************************************************************************/

class Flatten::Impl {
    using Points = Eigen::Matrix<double, FLATTEN_NODES.size(), 2>;
    using Vandermonde =
        Eigen::Matrix<double, FLATTEN_NODES.size(), FLATTEN_NODES.size()>;
    using Coefficients = Eigen::Vector<double, FLATTEN_NODES.size()>;

    Points p;
    Vandermonde V;
    Coefficients c;

    // Polynomial evaluation using Estrin's method, loop is unrolled at
    // compile time. A compiler should emit SIMD instructions from what
    // it sees here when the optimizer is involved, but even without it,
    // we'll likely see fused multiply-add instructions.

    inline auto evaluate(float const x) const {
        return [this]<Eigen::Index... I>(
                   std::size_t const i,
                   std::integer_sequence<Eigen::Index, I...>) {
            auto baseline = 0.0;
            auto exponent = 1.0;

            ((baseline += (c[I * 2] + c[I * 2 + 1] * i) * exponent,
              exponent *= i * i),
             ...);

            return static_cast<float>(baseline);
        }(x, std::make_integer_sequence<Eigen::Index,
                                        Coefficients::SizeAtCompileTime / 2>{});
    }

  public:
    void operator()(float *const data, std::size_t const size) {
        // Loop invariants; sentinel one past the end of the range, and
        // the number of points in each of the arms on either side of a
        // node.

        auto const end = data + size;
        auto const arm = size / (2 * FLATTEN_NODES.size());

        // Collect lower envelope points; use Chebyshev node interpolants
        // to reduce Runge's phenomenon oscillations.

        for (std::size_t i = 0; i < FLATTEN_NODES.size(); ++i) {
            auto const node = size * FLATTEN_NODES[i];
            auto const base = data + static_cast<int>(std::round(node));
            auto span = std::vector<float>(std::clamp(base - arm, data, end),
                                           std::clamp(base + arm, data, end));

            auto const n = span.size() * FLATTEN_SAMPLE / 100;

            std::nth_element(span.begin(), span.begin() + n, span.end());

            p.row(i) << node, span[n];
        }

        // Extract x and y values from points and prepare the Vandermonde
        // matrix, initializing the first column with 1 (x^0); remaining
        // columns are filled with the Schur product.

        Eigen::VectorXd x = p.col(0);
        Eigen::VectorXd y = p.col(1);

        V.col(0).setOnes();
        for (Eigen::Index i = 1; i < V.cols(); ++i) {
            V.col(i) = V.col(i - 1).cwiseProduct(x);
        }

        // Solve the least squares problem for polynomial coefficients;
        // evaluate the polynomial and subtract the baseline.

        c = V.colPivHouseholderQr().solve(y);

        for (std::size_t i = 0; i < size; ++i)
            data[i] -= evaluate(i);
    }
};

/******************************************************************************/
// Public Implementation
/******************************************************************************/

Flatten::Flatten(bool const flatten)
    : m_impl(flatten ? std::make_unique<Impl>() : nullptr) {}

Flatten::~Flatten() = default;

void Flatten::operator()(bool const flatten) {
    m_impl.reset(flatten ? new Impl() : nullptr);
}

void Flatten::operator()(float *const data, std::size_t const size) {
    if (m_impl && size)
        (*m_impl)(data, size);
}

/******************************************************************************/