File: tan_sve.c

package info (click to toggle)
glibc 2.41-11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 300,376 kB
  • sloc: ansic: 1,050,583; asm: 238,243; makefile: 20,379; python: 13,537; sh: 11,827; cpp: 5,197; awk: 1,795; perl: 317; yacc: 292; pascal: 182; sed: 19
file content (135 lines) | stat: -rw-r--r-- 5,123 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/* Double-precision vector (SVE) tan function

   Copyright (C) 2023-2025 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include "sv_math.h"
#include "poly_sve_f64.h"

static const struct data
{
  double c2, c4, c6, c8;
  double poly_1357[4];
  double c0, inv_half_pi;
  double half_pi_hi, half_pi_lo, range_val;
} data = {
  /* Polynomial generated with FPMinimax.  */
  .c2 = 0x1.ba1ba1bb46414p-5,
  .c4 = 0x1.226e5e5ecdfa3p-7,
  .c6 = 0x1.7ea75d05b583ep-10,
  .c8 = 0x1.4e4fd14147622p-12,
  .poly_1357 = { 0x1.1111111110a63p-3, 0x1.664f47e5b5445p-6,
		 0x1.d6c7ddbf87047p-9, 0x1.289f22964a03cp-11 },
  .c0 = 0x1.5555555555556p-2,
  .inv_half_pi = 0x1.45f306dc9c883p-1,
  .half_pi_hi = 0x1.921fb54442d18p0,
  .half_pi_lo = 0x1.1a62633145c07p-54,
  .range_val = 0x1p23,
};

static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg,
	      svbool_t special)
{
  svbool_t use_recip = svcmpeq (
      pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0);

  svfloat64_t n = svmad_x (pg, p, p, -1);
  svfloat64_t d = svmul_x (svptrue_b64 (), p, 2);
  svfloat64_t swap = n;
  n = svneg_m (n, use_recip, d);
  d = svsel (use_recip, swap, d);
  svfloat64_t y = svdiv_x (svnot_z (pg, special), n, d);
  return sv_call_f64 (tan, x, y, special);
}

/* Vector approximation for double-precision tan.
   Maximum measured error is 3.48 ULP:
   _ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
				      want -0x1.f6ccd8ecf7deap+37.  */
svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
{
  const struct data *dat = ptr_barrier (&data);
  svfloat64_t half_pi_c0 = svld1rq (svptrue_b64 (), &dat->c0);
  /* q = nearest integer to 2 * x / pi.  */
  svfloat64_t q = svmul_lane (x, half_pi_c0, 1);
  q = svrinta_x (pg, q);

  /* Use q to reduce x to r in [-pi/4, pi/4], by:
     r = x - q * pi/2, in extended precision.  */
  svfloat64_t r = x;
  svfloat64_t half_pi = svld1rq (svptrue_b64 (), &dat->half_pi_hi);
  r = svmls_lane (r, q, half_pi, 0);
  r = svmls_lane (r, q, half_pi, 1);
  /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
     formula.  */
  r = svmul_x (svptrue_b64 (), r, 0.5);

  /* Approximate tan(r) using order 8 polynomial.
     tan(x) is odd, so polynomial has the form:
     tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
     Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
     Then compute the approximation by:
     tan(r) ~= r + r^3 * (C0 + r^2 * P(r)).  */

  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
  svfloat64_t r4 = svmul_x (svptrue_b64 (), r2, r2);
  svfloat64_t r8 = svmul_x (svptrue_b64 (), r4, r4);
  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
  svfloat64_t C_24 = svld1rq (svptrue_b64 (), &dat->c2);
  svfloat64_t C_68 = svld1rq (svptrue_b64 (), &dat->c6);

  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
  svfloat64_t p01 = svmla_lane (sv_f64 (dat->poly_1357[0]), r2, C_24, 0);
  svfloat64_t p23 = svmla_lane_f64 (sv_f64 (dat->poly_1357[1]), r2, C_24, 1);
  svfloat64_t p03 = svmla_x (pg, p01, p23, r4);

  svfloat64_t p45 = svmla_lane (sv_f64 (dat->poly_1357[2]), r2, C_68, 0);
  svfloat64_t p67 = svmla_lane (sv_f64 (dat->poly_1357[3]), r2, C_68, 1);
  svfloat64_t p47 = svmla_x (pg, p45, p67, r4);

  svfloat64_t p = svmla_x (pg, p03, p47, r8);

  svfloat64_t z = svmul_x (svptrue_b64 (), p, r);
  z = svmul_x (svptrue_b64 (), r2, z);
  z = svmla_lane (z, r, half_pi_c0, 0);
  p = svmla_x (pg, r, r2, z);

  /* Recombination uses double-angle formula:
     tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
     and reciprocity around pi/2:
     tan(x) = 1 / (tan(pi/2 - x))
     to assemble result using change-of-sign and conditional selection of
     numerator/denominator dependent on odd/even-ness of q (quadrant).  */

  /* Invert condition to catch NaNs and Infs as well as large values.  */
  svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val));

  if (__glibc_unlikely (svptest_any (pg, special)))
    {
      return special_case (x, p, q, pg, special);
    }
  svbool_t use_recip = svcmpeq (
      pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0);

  svfloat64_t n = svmad_x (pg, p, p, -1);
  svfloat64_t d = svmul_x (svptrue_b64 (), p, 2);
  svfloat64_t swap = n;
  n = svneg_m (n, use_recip, d);
  d = svsel (use_recip, swap, d);
  return svdiv_x (pg, n, d);
}