1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
|
//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements single-precision soft-float division
// with the IEEE-754 default rounding (to nearest, ties to even).
//
// For simplicity, this implementation currently flushes denormals to zero.
// It should be a fairly straightforward exercise to implement gradual
// underflow with correct rounding.
//
//===----------------------------------------------------------------------===//
#define SINGLE_PRECISION
#include "fp_lib.h"
COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) {
const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
rep_t aSignificand = toRep(a) & significandMask;
rep_t bSignificand = toRep(b) & significandMask;
int scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if (aExponent - 1U >= maxExponent - 1U ||
bExponent - 1U >= maxExponent - 1U) {
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
// NaN / anything = qNaN
if (aAbs > infRep)
return fromRep(toRep(a) | quietBit);
// anything / NaN = qNaN
if (bAbs > infRep)
return fromRep(toRep(b) | quietBit);
if (aAbs == infRep) {
// infinity / infinity = NaN
if (bAbs == infRep)
return fromRep(qnanRep);
// infinity / anything else = +/- infinity
else
return fromRep(aAbs | quotientSign);
}
// anything else / infinity = +/- 0
if (bAbs == infRep)
return fromRep(quotientSign);
if (!aAbs) {
// zero / zero = NaN
if (!bAbs)
return fromRep(qnanRep);
// zero / anything else = +/- zero
else
return fromRep(quotientSign);
}
// anything else / zero = +/- infinity
if (!bAbs)
return fromRep(infRep | quotientSign);
// One or both of a or b is denormal. The other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if (aAbs < implicitBit)
scale += normalize(&aSignificand);
if (bAbs < implicitBit)
scale -= normalize(&bSignificand);
}
// Set the implicit significand bit. If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.
aSignificand |= implicitBit;
bSignificand |= implicitBit;
int quotientExponent = aExponent - bExponent + scale;
// 0x7504F333 / 2^32 + 1 = 3/4 + 1/sqrt(2)
// Align the significand of b as a Q31 fixed-point number in the range
// [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
// is accurate to about 3.5 binary digits.
uint32_t q31b = bSignificand << 8;
uint32_t reciprocal = UINT32_C(0x7504f333) - q31b;
// Now refine the reciprocal estimate using a Newton-Raphson iteration:
//
// x1 = x0 * (2 - x0 * b)
//
// This doubles the number of correct binary digits in the approximation
// with each iteration.
uint32_t correction;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;
// Adust the final 32-bit reciprocal estimate downward to ensure that it is
// strictly smaller than the infinitely precise exact reciprocal. Because
// the computation of the Newton-Raphson step is truncating at every step,
// this adjustment is small; most of the work is already done.
reciprocal -= 2;
// The numerical reciprocal is accurate to within 2^-28, lies in the
// interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
// than the true reciprocal of b. Multiplying a by this reciprocal thus
// gives a numerical q = a/b in Q24 with the following properties:
//
// 1. q < a/b
// 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
// 3. The error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
// from the fact that we truncate the product, and the 2^27 term
// is the error in the reciprocal of b scaled by the maximum
// possible value of a. As a consequence of this error bound,
// either q or nextafter(q) is the correctly rounded.
rep_t quotient = (uint64_t)reciprocal * (aSignificand << 1) >> 32;
// Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
// In either case, we are going to compute a residual of the form
//
// r = a - q*b
//
// We know from the construction of q that r satisfies:
//
// 0 <= r < ulp(q)*b
//
// If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
// already have the correct result. The exact halfway case cannot occur.
// We also take this time to right shift quotient if it falls in the [1,2)
// range and adjust the exponent accordingly.
rep_t residual;
if (quotient < (implicitBit << 1)) {
residual = (aSignificand << 24) - quotient * bSignificand;
quotientExponent--;
} else {
quotient >>= 1;
residual = (aSignificand << 23) - quotient * bSignificand;
}
const int writtenExponent = quotientExponent + exponentBias;
if (writtenExponent >= maxExponent) {
// If we have overflowed the exponent, return infinity.
return fromRep(infRep | quotientSign);
}
else if (writtenExponent < 1) {
if (writtenExponent == 0) {
// Check whether the rounded result is normal.
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit.
rep_t absResult = quotient & significandMask;
// Round.
absResult += round;
if (absResult & ~significandMask) {
// The rounded result is normal; return it.
return fromRep(absResult | quotientSign);
}
}
// Flush denormals to zero. In the future, it would be nice to add
// code to round them correctly.
return fromRep(quotientSign);
}
else {
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit.
rep_t absResult = quotient & significandMask;
// Insert the exponent.
absResult |= (rep_t)writtenExponent << significandBits;
// Round.
absResult += round;
// Insert the sign and return.
return fromRep(absResult | quotientSign);
}
}
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { return __divsf3(a, b); }
#else
COMPILER_RT_ALIAS(__divsf3, __aeabi_fdiv)
#endif
#endif
|