File: pow_helpers_fp64.cl

package info (click to toggle)
pocl 6.0-6
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 25,304 kB
  • sloc: lisp: 149,513; ansic: 103,778; cpp: 54,947; python: 1,513; sh: 949; ruby: 255; pascal: 226; tcl: 180; makefile: 173; java: 72; xml: 49
file content (65 lines) | stat: -rw-r--r-- 2,675 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/*===--------------------------------------------------------------------------
 *                   ROCm Device Libraries
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See ROCM_LICENSE.TXT for details.
 *===------------------------------------------------------------------------*/

#include "ocml_helpers.h"

v2type _CL_OVERLOADABLE
MATH_PRIVATE(epln)(vtype a)
{
    vtype m = BUILTIN_FREXP_MANT_F64(a);
    itype b = (m < (vtype)(2.0/3.0)) ? (itype)1 : (itype)0;
    m = BUILTIN_FLDEXP_F64(m, convert_inttype(b));
    itype e = BUILTIN_FREXP_EXP_F64(a) - b;

    v2type x = div(m - (vtype)1.0, add(m, (vtype)1.0));
    v2type s = sqr(x);
    vtype t = s.hi;
    vtype p = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
                   (vtype)0x1.dee674222de17p-4, (vtype)0x1.a6564968915a9p-4),
                   (vtype)0x1.e25e43abe935ap-4), (vtype)0x1.110ef47e6c9c2p-3),
                   (vtype)0x1.3b13bcfa74449p-3), (vtype)0x1.745d171bf3c30p-3),
                   (vtype)0x1.c71c71c7792cep-3), (vtype)0x1.24924924920dap-2),
                   (vtype)0x1.999999999999cp-2);

    // ln(2)*e + 2*x + x^3(c3 + x^2*p)
    v2type r = add(mul(con((vtype)0x1.62e42fefa39efp-1, (vtype)0x1.abc9e3b39803fp-56), convert_vtype(e)),
                    fadd(ldx(x,1),
                          mul(mul(s, x),
                              fadd(con((vtype)0x1.5555555555555p-1,(vtype)0x1.543b0d5df274dp-55),
                                   mul(s, p)))));

    return r;
}

vtype _CL_OVERLOADABLE
MATH_PRIVATE(expep)(v2type x)
{
    vtype dn = BUILTIN_RINT_F64(x.hi * 0x1.71547652b82fep+0);
    v2type t = fsub(fsub(sub(x, dn*0x1.62e42fefa3000p-1),
                  dn*0x1.3de6af278e000p-42), dn*0x1.9cc01f97b57a0p-83);

    vtype th = t.hi;
    vtype p = MATH_MAD(th, MATH_MAD(th, MATH_MAD(th, MATH_MAD(th,
               MATH_MAD(th, MATH_MAD(th, MATH_MAD(th, MATH_MAD(th,
               MATH_MAD(th,
                   (vtype)0x1.ade156a5dcb37p-26, (vtype)0x1.28af3fca7ab0cp-22),
                   (vtype)0x1.71dee623fde64p-19), (vtype)0x1.a01997c89e6b0p-16),
                   (vtype)0x1.a01a014761f6ep-13), (vtype)0x1.6c16c1852b7b0p-10),
                   (vtype)0x1.1111111122322p-7), (vtype)0x1.55555555502a1p-5),
                   (vtype)0x1.5555555555511p-3), (vtype)0x1.000000000000bp-1);

    v2type r = fadd(t, mul(sqr(t), p));
    vtype z = (vtype)1.0 + r.hi;

    z = BUILTIN_FLDEXP_F64(z, convert_inttype(dn));

    z = (x.hi > (vtype)710.0) ? as_vtype((utype)PINFBITPATT_DP64) : z;
    z = (x.hi < (vtype)-745.0) ? (vtype)0.0 : z;

    return z;
}