File: FEnvImpl.h

package info (click to toggle)
llvm-toolchain-13 1%3A13.0.1-11
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,418,840 kB
  • sloc: cpp: 5,290,826; ansic: 996,570; asm: 544,593; python: 188,212; objc: 72,027; lisp: 30,291; f90: 25,395; sh: 24,898; javascript: 9,780; pascal: 9,398; perl: 7,484; ml: 5,432; awk: 3,523; makefile: 2,913; xml: 953; cs: 573; fortran: 539
file content (389 lines) | stat: -rw-r--r-- 13,521 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
//===-- x86_64 floating point env manipulation functions --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_UTILS_FPUTIL_X86_64_FENVIMPL_H
#define LLVM_LIBC_UTILS_FPUTIL_X86_64_FENVIMPL_H

#include <fenv.h>
#include <stdint.h>

#include "src/__support/sanitizer.h"

namespace __llvm_libc {
namespace fputil {

namespace internal {

// Normally, one should be able to define FE_* macros to the exact rounding mode
// encodings. However, since we want LLVM libc to be compiled against headers
// from other libcs, we cannot assume that FE_* macros are always defined in
// such a manner. So, we will define enums corresponding to the x86_64 bit
// encodings. The implementations can map from FE_* to the corresponding enum
// values.

// The rounding control values in the x87 control register and the MXCSR
// register have the same 2-bit enoding but have different bit positions.
// See below for the bit positions.
struct RoundingControlValue {
  static constexpr uint16_t ToNearest = 0x0;
  static constexpr uint16_t Downward = 0x1;
  static constexpr uint16_t Upward = 0x2;
  static constexpr uint16_t TowardZero = 0x3;
};

static constexpr uint16_t X87RoundingControlBitPosition = 10;
static constexpr uint16_t MXCSRRoundingControlBitPosition = 13;

// The exception flags in the x87 status register and the MXCSR have the same
// encoding as well as the same bit positions.
struct ExceptionFlags {
  static constexpr uint16_t Invalid = 0x1;
  // Some libcs define __FE_DENORM corresponding to the denormal input
  // exception and include it in FE_ALL_EXCEPTS. We define and use it to
  // support compiling against headers provided by such libcs.
  static constexpr uint16_t Denormal = 0x2;
  static constexpr uint16_t DivByZero = 0x4;
  static constexpr uint16_t Overflow = 0x8;
  static constexpr uint16_t Underflow = 0x10;
  static constexpr uint16_t Inexact = 0x20;
};

// The exception control bits occupy six bits, one bit for each exception.
// In the x87 control word, they occupy the first 6 bits. In the MXCSR
// register, they occupy bits 7 to 12.
static constexpr uint16_t X87ExceptionControlBitPosition = 0;
static constexpr uint16_t MXCSRExceptionContolBitPoistion = 7;

// Exception flags are individual bits in the corresponding registers.
// So, we just OR the bit values to get the full set of exceptions.
static inline uint16_t getStatusValueForExcept(int excepts) {
  // We will make use of the fact that exception control bits are single
  // bit flags in the control registers.
  return (excepts & FE_INVALID ? ExceptionFlags::Invalid : 0) |
#ifdef __FE_DENORM
         (excepts & __FE_DENORM ? ExceptionFlags::Denormal : 0) |
#endif // __FE_DENORM
         (excepts & FE_DIVBYZERO ? ExceptionFlags::DivByZero : 0) |
         (excepts & FE_OVERFLOW ? ExceptionFlags::Overflow : 0) |
         (excepts & FE_UNDERFLOW ? ExceptionFlags::Underflow : 0) |
         (excepts & FE_INEXACT ? ExceptionFlags::Inexact : 0);
}

static inline int exceptionStatusToMacro(uint16_t status) {
  return (status & ExceptionFlags::Invalid ? FE_INVALID : 0) |
#ifdef __FE_DENORM
         (status & ExceptionFlags::Denormal ? __FE_DENORM : 0) |
#endif // __FE_DENORM
         (status & ExceptionFlags::DivByZero ? FE_DIVBYZERO : 0) |
         (status & ExceptionFlags::Overflow ? FE_OVERFLOW : 0) |
         (status & ExceptionFlags::Underflow ? FE_UNDERFLOW : 0) |
         (status & ExceptionFlags::Inexact ? FE_INEXACT : 0);
}

struct X87StateDescriptor {
  uint16_t ControlWord;
  uint16_t Unused1;
  uint16_t StatusWord;
  uint16_t Unused2;
  // TODO: Elaborate the remaining 20 bytes as required.
  uint32_t _[5];
};

static inline uint16_t getX87ControlWord() {
  uint16_t w;
  __asm__ __volatile__("fnstcw %0" : "=m"(w)::);
  SANITIZER_MEMORY_INITIALIZED(&w, sizeof(w));
  return w;
}

static inline void writeX87ControlWord(uint16_t w) {
  __asm__ __volatile__("fldcw %0" : : "m"(w) :);
}

static inline uint16_t getX87StatusWord() {
  uint16_t w;
  __asm__ __volatile__("fnstsw %0" : "=m"(w)::);
  SANITIZER_MEMORY_INITIALIZED(&w, sizeof(w));
  return w;
}

static inline void clearX87Exceptions() {
  __asm__ __volatile__("fnclex" : : :);
}

static inline uint32_t getMXCSR() {
  uint32_t w;
  __asm__ __volatile__("stmxcsr %0" : "=m"(w)::);
  SANITIZER_MEMORY_INITIALIZED(&w, sizeof(w));
  return w;
}

static inline void writeMXCSR(uint32_t w) {
  __asm__ __volatile__("ldmxcsr %0" : : "m"(w) :);
}

static inline void getX87StateDescriptor(X87StateDescriptor &s) {
  __asm__ __volatile__("fnstenv %0" : "=m"(s));
  SANITIZER_MEMORY_INITIALIZED(&s, sizeof(s));
}

static inline void writeX87StateDescriptor(const X87StateDescriptor &s) {
  __asm__ __volatile__("fldenv %0" : : "m"(s) :);
}

static inline void fwait() { __asm__ __volatile__("fwait"); }

} // namespace internal

static inline int enableExcept(int excepts) {
  // In the x87 control word and in MXCSR, an exception is blocked
  // if the corresponding bit is set. That is the reason for all the
  // bit-flip operations below as we need to turn the bits to zero
  // to enable them.

  uint16_t bitMask = internal::getStatusValueForExcept(excepts);

  uint16_t x87CW = internal::getX87ControlWord();
  uint16_t oldExcepts = ~x87CW & 0x3F; // Save previously enabled exceptions.
  x87CW &= ~bitMask;
  internal::writeX87ControlWord(x87CW);

  // Enabling SSE exceptions via MXCSR is a nice thing to do but
  // might not be of much use practically as SSE exceptions and the x87
  // exceptions are independent of each other.
  uint32_t mxcsr = internal::getMXCSR();
  mxcsr &= ~(bitMask << internal::MXCSRExceptionContolBitPoistion);
  internal::writeMXCSR(mxcsr);

  // Since the x87 exceptions and SSE exceptions are independent of each,
  // it doesn't make much sence to report both in the return value. Most
  // often, the standard floating point functions deal with FPU operations
  // so we will retrun only the old x87 exceptions.
  return internal::exceptionStatusToMacro(oldExcepts);
}

static inline int disableExcept(int excepts) {
  // In the x87 control word and in MXCSR, an exception is blocked
  // if the corresponding bit is set.

  uint16_t bitMask = internal::getStatusValueForExcept(excepts);

  uint16_t x87CW = internal::getX87ControlWord();
  uint16_t oldExcepts = ~x87CW & 0x3F; // Save previously enabled exceptions.
  x87CW |= bitMask;
  internal::writeX87ControlWord(x87CW);

  // Just like in enableExcept, it is not clear if disabling SSE exceptions
  // is required. But, we will still do it only as a "nice thing to do".
  uint32_t mxcsr = internal::getMXCSR();
  mxcsr |= (bitMask << internal::MXCSRExceptionContolBitPoistion);
  internal::writeMXCSR(mxcsr);

  return internal::exceptionStatusToMacro(oldExcepts);
}

static inline int clearExcept(int excepts) {
  internal::X87StateDescriptor state;
  internal::getX87StateDescriptor(state);
  state.StatusWord &= ~internal::getStatusValueForExcept(excepts);
  internal::writeX87StateDescriptor(state);

  uint32_t mxcsr = internal::getMXCSR();
  mxcsr &= ~internal::getStatusValueForExcept(excepts);
  internal::writeMXCSR(mxcsr);
  return 0;
}

static inline int testExcept(int excepts) {
  uint16_t statusValue = internal::getStatusValueForExcept(excepts);
  // Check both x87 status word and MXCSR.
  return internal::exceptionStatusToMacro(
      (statusValue & internal::getX87StatusWord()) |
      (statusValue & internal::getMXCSR()));
}

// Sets the exception flags but does not trigger the exception handler.
static inline int setExcept(int excepts) {
  uint16_t statusValue = internal::getStatusValueForExcept(excepts);
  internal::X87StateDescriptor state;
  internal::getX87StateDescriptor(state);
  state.StatusWord |= statusValue;
  internal::writeX87StateDescriptor(state);

  uint32_t mxcsr = internal::getMXCSR();
  mxcsr |= statusValue;
  internal::writeMXCSR(mxcsr);

  return 0;
}

static inline int raiseExcept(int excepts) {
  uint16_t statusValue = internal::getStatusValueForExcept(excepts);

  // We set the status flag for exception one at a time and call the
  // fwait instruction to actually get the processor to raise the
  // exception by calling the exception handler. This scheme is per
  // the description in in "8.6 X87 FPU EXCEPTION SYNCHRONIZATION"
  // of the "Intel 64 and IA-32 Architectures Software Developer's
  // Manual, Vol 1".

  // FPU status word is read for each exception seperately as the
  // exception handler can potentially write to it (typically to clear
  // the corresponding exception flag). By reading it separately, we
  // ensure that the writes by the exception handler are maintained
  // when raising the next exception.

  auto raiseHelper = [](uint16_t  singleExceptFlag) {
    internal::X87StateDescriptor state;
    internal::getX87StateDescriptor(state);
    state.StatusWord |= singleExceptFlag;
    internal::writeX87StateDescriptor(state);
    internal::fwait();
  };

  if (statusValue & internal::ExceptionFlags::Invalid)
    raiseHelper(internal::ExceptionFlags::Invalid);
  if (statusValue & internal::ExceptionFlags::DivByZero)
    raiseHelper(internal::ExceptionFlags::DivByZero);
  if (statusValue & internal::ExceptionFlags::Overflow)
    raiseHelper(internal::ExceptionFlags::Overflow);
  if (statusValue & internal::ExceptionFlags::Underflow)
    raiseHelper(internal::ExceptionFlags::Underflow);
  if (statusValue & internal::ExceptionFlags::Inexact)
    raiseHelper(internal::ExceptionFlags::Inexact);
#ifdef __FE_DENORM
  if (statusValue & internal::ExceptionFlags::Denormal) {
    raiseHelper(internal::ExceptionFlags::Denormal);
  }
#endif // __FE_DENORM

  // There is no special synchronization scheme available to
  // raise SEE exceptions. So, we will ignore that for now.
  // Just plain writing to the MXCSR register does not guarantee
  // the exception handler will be called.

  return 0;
}

static inline int getRound() {
  uint16_t bitValue =
      (internal::getMXCSR() >> internal::MXCSRRoundingControlBitPosition) & 0x3;
  switch (bitValue) {
  case internal::RoundingControlValue::ToNearest:
    return FE_TONEAREST;
  case internal::RoundingControlValue::Downward:
    return FE_DOWNWARD;
  case internal::RoundingControlValue::Upward:
    return FE_UPWARD;
  case internal::RoundingControlValue::TowardZero:
    return FE_TOWARDZERO;
  default:
    return -1; // Error value.
  }
}

static inline int setRound(int mode) {
  uint16_t bitValue;
  switch (mode) {
  case FE_TONEAREST:
    bitValue = internal::RoundingControlValue::ToNearest;
    break;
  case FE_DOWNWARD:
    bitValue = internal::RoundingControlValue::Downward;
    break;
  case FE_UPWARD:
    bitValue = internal::RoundingControlValue::Upward;
    break;
  case FE_TOWARDZERO:
    bitValue = internal::RoundingControlValue::TowardZero;
    break;
  default:
    return 1; // To indicate failure
  }

  uint16_t x87Value = static_cast<uint16_t>(
      bitValue << internal::X87RoundingControlBitPosition);
  uint16_t x87Control = internal::getX87ControlWord();
  x87Control = static_cast<uint16_t>(
      (x87Control &
       ~(uint16_t(0x3) << internal::X87RoundingControlBitPosition)) |
      x87Value);
  internal::writeX87ControlWord(x87Control);

  uint32_t mxcsrValue = bitValue << internal::MXCSRRoundingControlBitPosition;
  uint32_t mxcsrControl = internal::getMXCSR();
  mxcsrControl =
      (mxcsrControl & ~(0x3 << internal::MXCSRRoundingControlBitPosition)) |
      mxcsrValue;
  internal::writeMXCSR(mxcsrControl);

  return 0;
}

namespace internal {

#ifdef _WIN32
// MSVC fenv.h defines a very simple representation of the floating point state
// which just consists of control and status words of the x87 unit.
struct FPState {
  uint32_t ControlWord;
  uint32_t StatusWord;
};
#else
struct FPState {
  X87StateDescriptor X87Status;
  uint32_t MXCSR;
};
#endif // _WIN32

} // namespace internal

static_assert(
    sizeof(fenv_t) == sizeof(internal::FPState),
    "Internal floating point state does not match the public fenv_t type.");

#ifdef _WIN32
static inline int getEnv(fenv_t *envp) {
  internal::FPState *state = reinterpret_cast<internal::FPState *>(envp);
  internal::X87StateDescriptor X87Status;
  internal::getX87StateDescriptor(X87Status);
  state->ControlWord = X87Status.ControlWord;
  state->StatusWord = X87Status.StatusWord;
  return 0;
}

static inline int setEnv(const fenv_t *envp) {
  const internal::FPState *state =
      reinterpret_cast<const internal::FPState *>(envp);
  internal::X87StateDescriptor X87Status;
  X87Status.ControlWord = state->ControlWord;
  X87Status.StatusWord = state->StatusWord;
  internal::writeX87StateDescriptor(X87Status);
  return 0;
}
#else
static inline int getEnv(fenv_t *envp) {
  internal::FPState *state = reinterpret_cast<internal::FPState *>(envp);
  internal::getX87StateDescriptor(state->X87Status);
  state->MXCSR = internal::getMXCSR();
  return 0;
}

static inline int setEnv(const fenv_t *envp) {
  const internal::FPState *state =
      reinterpret_cast<const internal::FPState *>(envp);
  internal::writeX87StateDescriptor(state->X87Status);
  internal::writeMXCSR(state->MXCSR);
  return 0;
}
#endif

} // namespace fputil
} // namespace __llvm_libc

#endif // LLVM_LIBC_UTILS_FPUTIL_X86_64_FENVIMPL_H