File: pow_helpers_fp32.cl

package info (click to toggle)
pocl 6.0-6
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 25,304 kB
  • sloc: lisp: 149,513; ansic: 103,778; cpp: 54,947; python: 1,513; sh: 949; ruby: 255; pascal: 226; tcl: 180; makefile: 173; java: 72; xml: 49
file content (58 lines) | stat: -rw-r--r-- 1,956 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/*===--------------------------------------------------------------------------
 *                   ROCm Device Libraries
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See ROCM_LICENSE.TXT for details.
 *===------------------------------------------------------------------------*/

#include "ocml_helpers.h"

v2type _CL_OVERLOADABLE
MATH_PRIVATE(epln)(vtype a)
{
    vtype m = BUILTIN_FREXP_MANT_F32(a);
    itype b = (m < (vtype)(2.0f/3.0f)) ? (itype)1 : (itype)0;
    m = BUILTIN_FLDEXP_F32(m, b);
    itype e = BUILTIN_FREXP_EXP_F32(a) - b;

    v2type x = div(m - (vtype)1.0f, add(m, (vtype)1.0f));
    v2type s = sqr(x);
    vtype t = s.hi;
    vtype p = MATH_MAD(t, MATH_MAD(t, (vtype)0x1.ed89c2p-3f,
                      (vtype)0x1.23e988p-2f), (vtype)0x1.999bdep-2f);

    // ln(2)*e + 2*x + x^3(c3 + x^2*p)
    v2type r = add(mul(con((vtype)0x1.62e430p-1f, (vtype)-0x1.05c610p-29f),
                 convert_vtype(e)),
                   fadd(ldx(x,1),
                      mul(mul(s, x),
                        fadd(con((vtype)0x1.555554p-1f,
                                 (vtype)0x1.e72020p-29f),
                             mul(s, p)))));

    return r;
}


vtype _CL_OVERLOADABLE
MATH_PRIVATE(expep)(v2type x)
{
    vtype fn = BUILTIN_RINT_F32(x.hi * 0x1.715476p+0f);
    v2type t = fsub(fsub(sub(x, fn*0x1.62e400p-1f), fn*0x1.7f7800p-20f), fn*0x1.473de6p-34f);

    vtype th = t.hi;
    vtype p = MATH_MAD(th, MATH_MAD(th, MATH_MAD(th, MATH_MAD(th,
                  (vtype)0x1.6850e4p-10f, (vtype)0x1.123bccp-7f),
                  (vtype)0x1.555b98p-5f), (vtype)0x1.55548ep-3f),
                  (vtype)0x1.fffff8p-2f);

    v2type r = fadd(t, mul(sqr(t), p));
    vtype z = (vtype)1.0f + r.hi;

    z = BUILTIN_FLDEXP_F32(z, convert_inttype(fn));

    z = (x.hi > (vtype)89.0f) ? as_vtype((utype)PINFBITPATT_SP32) : z;
    z = (x.hi < (vtype)-104.0f) ? (vtype)0.0f : z;

    return z;
}