File: ocml_helpers_fp32.cl

package info (click to toggle)
pocl 1.6-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 17,816 kB
  • sloc: lisp: 135,476; ansic: 64,403; cpp: 32,196; vhdl: 1,040; sh: 382; python: 336; makefile: 151; pascal: 140; java: 72; xml: 49
file content (40 lines) | stat: -rw-r--r-- 1,166 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/*===--------------------------------------------------------------------------
 *                   ROCm Device Libraries
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See ROCM_LICENSE.TXT for details.
 *===------------------------------------------------------------------------*/

#define FLOAT_SPECIALIZATION
#include "ocml_helpers.h"
#include "ocml_helpers_impl.cl"
#undef FLOAT_SPECIALIZATION

// The arguments must only be variable names
#define FULL_MUL(A, B, CHI, CLO) \
    do { \
        vtype __ha = as_vtype(as_utype(A) & (utype)0xfffff000U); \
        vtype __ta = A - __ha; \
        vtype __hb = as_vtype(as_utype(B) & (utype)0xfffff000U); \
        vtype __tb = B - __hb; \
        CHI = A * B; \
        CLO = MATH_MAD(__ta, __tb, MATH_MAD(__ta, __hb, MATH_MAD(__ha, __tb, MATH_MAD(__ha, __hb, -CHI)))); \
    } while (0)


OCML_ATTR vtype
fnma(vtype a, vtype b, vtype c)
{
    vtype d;
    if (HAVE_FMA32) {
        d = BUILTIN_FMA_F32(-a, b, c);
    } else {
        vtype h, t;
        FULL_MUL(a, b, h, t);
        d = c - h;
        d = (((c - d) - h) - t) + d;
    }
    return d;
}

#undef FULL_MUL