1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
//===-- Single-precision tan function -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/math/tanf.h"
#include "sincosf_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
#include <errno.h>
namespace LIBC_NAMESPACE_DECL {
// Exceptional cases for tanf.
constexpr size_t N_EXCEPTS = 6;
constexpr fputil::ExceptValues<float, N_EXCEPTS> TANF_EXCEPTS{{
// (inputs, RZ output, RU offset, RD offset, RN offset)
// x = 0x1.ada6aap27, tan(x) = 0x1.e80304p-3 (RZ)
{0x4d56d355, 0x3e740182, 1, 0, 0},
// x = 0x1.862064p33, tan(x) = -0x1.8dee56p-3 (RZ)
{0x50431032, 0xbe46f72b, 0, 1, 1},
// x = 0x1.af61dap48, tan(x) = 0x1.60d1c6p-2 (RZ)
{0x57d7b0ed, 0x3eb068e3, 1, 0, 1},
// x = 0x1.0088bcp52, tan(x) = 0x1.ca1edp0 (RZ)
{0x5980445e, 0x3fe50f68, 1, 0, 0},
// x = 0x1.f90dfcp72, tan(x) = 0x1.597f9cp-1 (RZ)
{0x63fc86fe, 0x3f2cbfce, 1, 0, 0},
// x = 0x1.a6ce12p86, tan(x) = -0x1.c5612ep-1 (RZ)
{0x6ad36709, 0xbf62b097, 0, 1, 0},
}};
LLVM_LIBC_FUNCTION(float, tanf, (float x)) {
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
bool x_sign = xbits.uintval() >> 31;
uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
// |x| < pi/32
if (LIBC_UNLIKELY(x_abs <= 0x3dc9'0fdbU)) {
double xd = static_cast<double>(x);
// |x| < 0x1.0p-12f
if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) {
if (LIBC_UNLIKELY(x_abs == 0U)) {
// For signed zeros.
return x;
}
// When |x| < 2^-12, the relative error of the approximation tan(x) ~ x
// is:
// |tan(x) - x| / |tan(x)| < |x^3| / (3|x|)
// = x^2 / 3
// < 2^-25
// < epsilon(1)/2.
// So the correctly rounded values of tan(x) are:
// = x + sign(x)*eps(x) if rounding mode = FE_UPWARD and x is positive,
// or (rounding mode = FE_DOWNWARD and x is
// negative),
// = x otherwise.
// To simplify the rounding decision and make it more efficient, we use
// fma(x, 2^-25, x) instead.
// Note: to use the formula x + 2^-25*x to decide the correct rounding, we
// do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
// |x| < 2^-125. For targets without FMA instructions, we simply use
// double for intermediate results as it is more efficient than using an
// emulated version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA)
return fputil::multiply_add(x, 0x1.0p-25f, x);
#else
return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
#endif // LIBC_TARGET_CPU_HAS_FMA
}
// |x| < pi/32
double xsq = xd * xd;
// Degree-9 minimax odd polynomial of tan(x) generated by Sollya with:
// > P = fpminimax(tan(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/32]);
double result =
fputil::polyeval(xsq, 1.0, 0x1.555555553d022p-2, 0x1.111111ce442c1p-3,
0x1.ba180a6bbdecdp-5, 0x1.69c0a88a0b71fp-6);
return static_cast<float>(xd * result);
}
// Check for exceptional values
if (LIBC_UNLIKELY(x_abs == 0x3f8a1f62U)) {
// |x| = 0x1.143ec4p0
float sign = x_sign ? -1.0f : 1.0f;
// volatile is used to prevent compiler (gcc) from optimizing the
// computation, making the results incorrect in different rounding modes.
volatile float tmp = 0x1.ddf9f4p0f;
tmp = fputil::multiply_add(sign, tmp, sign * 0x1.1p-24f);
return tmp;
}
// |x| > 0x1.ada6a8p+27f
if (LIBC_UNLIKELY(x_abs > 0x4d56'd354U)) {
// Inf or NaN
if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
if (x_abs == 0x7f80'0000U) {
fputil::set_errno_if_required(EDOM);
fputil::raise_except_if_required(FE_INVALID);
}
return x + FPBits::quiet_nan().get_val();
}
// Other large exceptional values
if (auto r = TANF_EXCEPTS.lookup_odd(x_abs, x_sign);
LIBC_UNLIKELY(r.has_value()))
return r.value();
}
// For |x| >= pi/32, we use the definition of tan(x) function:
// tan(x) = sin(x) / cos(x)
// The we follow the same computations of sin(x) and cos(x) as sinf, cosf,
// and sincosf.
double xd = static_cast<double>(x);
double sin_k, cos_k, sin_y, cosm1_y;
sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
// tan(x) = sin(x) / cos(x)
// = (sin_y * cos_k + cos_y * sin_k) / (cos_y * cos_k - sin_y * sin_k)
using fputil::multiply_add;
return static_cast<float>(
multiply_add(sin_y, cos_k, multiply_add(cosm1_y, sin_k, sin_k)) /
multiply_add(sin_y, -sin_k, multiply_add(cosm1_y, cos_k, cos_k)));
}
} // namespace LIBC_NAMESPACE_DECL
|