File: bitfloat.h

package info (click to toggle)
aoflagger 3.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,004 kB
  • sloc: cpp: 67,891; python: 497; sh: 242; makefile: 22
file content (303 lines) | stat: -rw-r--r-- 10,276 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#ifndef SISCO_BIT_FLOAT_H_
#define SISCO_BIT_FLOAT_H_

#include <bit>
#include <cinttypes>
#include <limits>
#include <optional>
#include <stdexcept>
#include <string_view>

namespace casacore {

enum class BitFloatKind : uint8_t {
  Zero,
  NegativeZero,
  Normal,
  NaN,
  SignallingNaN,
  Infinity,
  NegativeInfinity,
  Subnormal
};

constexpr std::string_view ToString(BitFloatKind kind) {
  switch (kind) {
    case BitFloatKind::Zero:
      return "zero";
    case BitFloatKind::NegativeZero:
      return "negative zero";
    case BitFloatKind::Normal:
      return "normal";
    case BitFloatKind::NaN:
      return "nan";
    case BitFloatKind::SignallingNaN:
      return "signalling nan";
    case BitFloatKind::Infinity:
      return "infinity";
    case BitFloatKind::NegativeInfinity:
      return "negative infinity";
    case BitFloatKind::Subnormal:
      return "subnormal";
  }
  __builtin_unreachable();
}

/**
 * Class that understands the bit-representation of single-precision floating
 * point numbers, and that can decompose it into mantissa, exponent, sign and
 * special values. It can also do some (limited) mathematical operations on it,
 * while it can do these operations without loss of precision of the source
 * number. It is used to implement compression (Sisco) that is based on
 * predicting the next value in time and frequency.
 */
class BitFloat {
 public:
  /** Constructs a zero-value BitFloat. */
  constexpr BitFloat() : mantissa_(0), exponent_(-127), sign_(false) {}

  /** Constructs a BitFloat by decomposing the specified floating point value.
   */
  constexpr explicit BitFloat(float f)
      : mantissa_((std::bit_cast<uint32_t>(f) & 0x007FFFFF) | 0x00800000),
        exponent_(static_cast<int8_t>(
                      (std::bit_cast<uint32_t>(f) & 0x7F800000) >> 23) -
                  127),
        sign_(std::bit_cast<uint32_t>(f) & 0x80000000) {
    if (f == 0.0f) mantissa_ = 0;
  }

  constexpr BitFloat(uint32_t mantissa, int8_t exponent, bool sign)
      : mantissa_(mantissa), exponent_(exponent), sign_(sign) {}

  constexpr uint32_t Mantissa() const { return mantissa_; }

  constexpr int8_t Exponent() const { return exponent_; }

  /**
   * Combines the sign and the mantissa into one uint32_t. The mantissa of a
   * single-precision floating point values is only 23 bits, and so this would
   * fit easily. However, operations performed on the BitFloat like addition and
   * multiplication may enlarge the mantissa, causing an 'overflow'.
   *
   * This method checks if mantissa's 32th bit is set (which would conflict with
   * the bit used for the sign), and throws an exception in that case. This is
   * not a full check for the occurence of overflow, as the mantissa might have
   * overflown twice, and thus further overflow checking is necessary when doing
   * operations.
   */
  constexpr uint32_t PackMantissa() const {
    if (MantissaOverflow()) {
      throw std::overflow_error(
          "An overflow occured! Value = " + std::to_string(ToFloat()) +
          ", exponent = " + std::to_string(exponent_) + ", mantissa = " +
          std::to_string(mantissa_) + ", sign = " + std::to_string(sign_));
    }
    return (mantissa_ & 0x7FFFFFFFu) | (sign_ ? 0x80000000u : 0u);
  }

  /**
   * Given a result from @ref PackMantissa(), this function reversed the
   * packing.
   */
  constexpr static std::pair<uint32_t, bool> UnpackMantissa(
      uint32_t mantissa_with_sign) {
    const uint32_t mantissa = (mantissa_with_sign & 0x7FFFFFFFu);
    const bool sign = (mantissa_with_sign & 0x80000000u) != 0u;
    return {mantissa, sign};
  }

  /**
   * Constructs a BitFloat from a 'packed mantissa' (see @ref PackMantissa())
   * and the exponent. In Sisco, these are stored separately, and this method is
   * used to reconstruct the BitFloat.
   */
  constexpr static BitFloat FromCompressed(uint32_t mantissa_with_sign,
                                           int8_t exponent) {
    const std::pair<uint32_t, bool> unpacked =
        UnpackMantissa(mantissa_with_sign);
    return BitFloat(unpacked.first, exponent, unpacked.second);
  }

  constexpr bool Sign() const { return sign_; }

  /**
   * Adds the value @p rhs to this. this and @p rhs must be exponent-matched
   * beforehand. The function doesn't check this, neither does it check for
   * overflow or non-finite values.
   */
  constexpr BitFloat& operator+=(const BitFloat& rhs) {
    if (sign_ == rhs.sign_) {
      mantissa_ = mantissa_ + rhs.mantissa_;
    } else if (mantissa_ >= rhs.mantissa_) {
      mantissa_ = mantissa_ - rhs.mantissa_;
    } else {
      mantissa_ = rhs.mantissa_ - mantissa_;
      sign_ = !sign_;
    }
    return *this;
  }

  /**
   * Like operator+=, but subtracts @p rhs.
   */
  constexpr BitFloat& operator-=(const BitFloat& rhs) {
    if (sign_ != rhs.sign_) {
      mantissa_ = mantissa_ + rhs.mantissa_;
    } else if (mantissa_ >= rhs.mantissa_) {
      mantissa_ = mantissa_ - rhs.mantissa_;
    } else {
      mantissa_ = rhs.mantissa_ - mantissa_;
      sign_ = !sign_;
    }
    return *this;
  }

  /**
   * Multiplies the value by an integer factor. The exponent is not changed, and
   * overflow or special values aren't checked for.
   */
  constexpr BitFloat& operator*=(unsigned factor) {
    mantissa_ *= factor;
    return *this;
  }

  /**
   * Divides the value by an integer factor. The exponent is not changed, and
   * overflow or special values aren't checked for.
   */
  constexpr BitFloat& operator/=(unsigned factor) {
    mantissa_ /= factor;
    return *this;
  }

  /**
   * Compose this value back into a single-precision floating point value. Since
   * this class allows storing unnormalized floating point values, this function
   * normalizes the value if necessary. If the original value represented a
   * special value (like NaN or inf), the value returned will be the bit-wise
   * equal special value.
   */
  constexpr float ToFloat() const {
    int8_t exponent = exponent_;
    uint32_t result = mantissa_;
    // Exponent values of +128 and -127 have special meaning.
    // 128 will be wrapped.
    if (exponent_ != static_cast<int8_t>(128u) && exponent_ != -127) {
      if (mantissa_ == 0) return sign_ ? -0.0f : 0.0f;
      while (result & 0xFF000000) {
        ++exponent;
        result = (result >> 1);
      }
      while ((result & 0x00800000) == 0) {
        --exponent;
        result = (result << 1);
      }
    }
    // The double cast of the exponent is necessary to prevent sign extension.
    result =
        (result & 0x007FFFFF) |
        (static_cast<uint32_t>(static_cast<uint8_t>(exponent + 127)) << 23) |
        (sign_ ? 0x80000000 : 0x0);
    return std::bit_cast<float>(result);
  }

  explicit constexpr operator float() const { return ToFloat(); }

  /**
   * Based on the exponent, determines if this is a special value and should not
   * be used in mathematical operations like addition or multiplication. The
   * value 'zero' is also considered to not allow math.
   *
   * Sisco stores the exponents separately and based on the exponent determines
   * if it can do prediction to reconstruct the mantissa. Hence, the only
   * information available is the exponent.
   */
  static constexpr bool AllowsMath(int8_t exponent) {
    return exponent != static_cast<int8_t>(128u) && exponent != -127;
  }

  constexpr bool AllowsMath() const { return AllowsMath(exponent_); }

  /**
   * Negation; flips the sign of the value.
   */
  friend constexpr BitFloat operator-(const BitFloat& input) {
    return BitFloat(input.mantissa_, input.exponent_, !input.sign_);
  }

  friend constexpr bool operator==(const BitFloat& lhs, const BitFloat& rhs) {
    return lhs.mantissa_ == rhs.mantissa_ && lhs.exponent_ == rhs.exponent_ &&
           lhs.sign_ == rhs.sign_;
  }

  /**
   * Shifts the input value such that its exponent matches the specified
   * exponent. This should be used before operations like addition and
   * multiplication. Note that this may lead to loss of precision if the @p
   * value_exponent is larger than the input.
   *
   * In case the shift would lead to overflow, no value is returned. In case the
   * shifting results in the value becoming zero, a BitFloat representing zero
   * is returned, but with the 'unnormalized' exponent still set to the
   * requested exponent, meaning that it can be used in operations.
   */
  friend constexpr std::optional<BitFloat> Match(const BitFloat& input,
                                                 int8_t value_exponent) {
    if (input.Exponent() == value_exponent) {
      return input;
    } else if (input.Exponent() > value_exponent) {
      const uint8_t shift = input.Exponent() - value_exponent;
      if (shift > 7)
        return {};
      else
        return BitFloat(input.Mantissa() << shift, value_exponent,
                        input.Sign());
    } else {
      const uint8_t shift = value_exponent - input.Exponent();
      if (shift > 24)
        return BitFloat(0, value_exponent, input.Sign());
      else
        return BitFloat(input.Mantissa() >> shift, value_exponent,
                        input.Sign());
    }
  }

  /** Returns true if bit 32 is set. */
  constexpr bool MantissaOverflow() const { return mantissa_ & 0x80000000; }

  /** Determine what kind of float the specified value is: normal, nan, inf,
   * etc. */
  constexpr static BitFloatKind GetKind(float f) {
    const uint32_t value = std::bit_cast<uint32_t>(f);
    if (value == 0) {
      return BitFloatKind::Zero;
    } else if (value == 0x80000000) {
      return BitFloatKind::NegativeZero;
    } else if ((value & 0x7F800000) == 0x7F800000) {
      // exponent is 128
      if (value & 0x00400000)
        return BitFloatKind::NaN;
      else
        return BitFloatKind::SignallingNaN;
    } else if ((value & 0x7FFFFFFF) == 0b1111111100000000000000000000000) {
      if (value & 0x80000000)
        return BitFloatKind::NegativeInfinity;
      else
        return BitFloatKind::Infinity;
    } else if ((value & 0x7F800000) == 0) {
      return BitFloatKind::Subnormal;
    } else {
      return BitFloatKind::Normal;
    }
  }

 private:
  uint32_t mantissa_;
  int8_t exponent_;
  bool sign_;
};

}  // namespace casacore

#endif