File: acos_s_la.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (126 lines) | stat: -rw-r--r-- 3,806 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*========================== begin_copyright_notice ============================

Copyright (C) 2020-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "../imf.h"
#pragma OPENCL FP_CONTRACT OFF
typedef struct
{
    unsigned int SgnBit;
    unsigned int OneHalf;
    unsigned int sRsqrtMsk;
    unsigned int SmallNorm;
    unsigned int MOne;
    unsigned int HalfMask;
    unsigned int SQMask;
    unsigned int Two;
    unsigned int sqrt_coeff[2];
    unsigned int poly_coeff[5];
    unsigned int Pi2H;
    unsigned int Pi2L;
    unsigned int PiH;
    unsigned int PiL;
    unsigned int Zero;
    unsigned int SgnMask;
    unsigned int NanMask;
    unsigned int ep_coeff[3];
} __internal_sacos_la_data_t;
static __constant __internal_sacos_la_data_t __internal_sacos_la_data = {

    0x80000000u, 0x3f000000u, 0xfffff000u, 0x2f800000u, 0xbf800000u, 0xffffe000u, 0xfffff800u, 0x40000000u, {
                                                                                                             0xbdC00004u, 0x3e800001u}

    , {
       0x3d2EDC07u, 0x3CC32A6Bu, 0x3d3A9AB4u, 0x3d997C12u, 0x3e2AAAFFu}

    , 0x3fc90FDBu, 0xB33BBD2Eu, 0x40490FDBu, 0xB3BBBD2Eu, 0x00000000u, 0x80000000u, 0xffc00000u, {
                                                                                                  0x3dC4C6AEu, 0x3e2876B2u, 0x380561A3u}

};
static __constant int_float __sacos_la_c5 = { 0x3d2edc07u };
static __constant int_float __sacos_la_c4 = { 0x3cc32a6bu };
static __constant int_float __sacos_la_c3 = { 0x3d3a9ab4u };
static __constant int_float __sacos_la_c2 = { 0x3d997c12u };
static __constant int_float __sacos_la_c1 = { 0x3e2aaaffu };
static __constant int_float __sacos_la_c0 = { 0x3f800000u };
static __constant int_float __sacos_la_small_float = { 0x01800000u };

static __constant int_float __sacos_la_two = { 0x40000000u };

static __constant int_float __sacos_la_pi2h = { 0x3FC90FDBu };
static __constant int_float __sacos_la_pi2l = { 0xB33BBD2Eu };

static __constant int_float __sacos_la_pih = { 0x40490FDBu };
static __constant int_float __sacos_la_pil = { 0xB3BBBD2Eu };

__attribute__((always_inline))
inline int __internal_sacos_la_cout (float *pxin, float *pres)
{
    int nRet = 0;
    float xin = *pxin;
    int_float y, res;
    {
        int_float x, xa, RS, Shh2, High, R0;
        int sgn_x;
        float R, poly, Sh, Shh;

        x.f = xin;

        xa.w = x.w & 0x7fffffff;

        sgn_x = x.w ^ xa.w;

        y.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-(xa.f), 0.5f, 0.5f);

        R = xin * xin;
        R = SPIRV_OCL_BUILTIN(fmin, _f32_f32, ) (R, y.f);

        High.f = sgn_x ? __sacos_la_pih.f : 0.0f;
        High.f = (xa.f <= 0.5f) ? __sacos_la_pi2h.f : High.f;

        RS.f = 1.0f / SPIRV_OCL_BUILTIN(sqrt, _f32, ) ((y.f + __sacos_la_small_float.f));

        RS.w |= sgn_x;

        Sh = (y.f * RS.f);

        Shh2.f = -2.0f * Sh;

        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (__sacos_la_c5.f, R, __sacos_la_c4.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __sacos_la_c3.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __sacos_la_c2.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __sacos_la_c1.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __sacos_la_c0.f);

        R0.f = (xa.f <= 0.5f) ? x.f : Shh2.f;

        res.f = (SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-(poly), R0.f, High.f));
    }

    *pres = res.f;
    nRet = (y.f >= 0.0f) ? 0 : 1;

    return nRet;
}

float __ocl_svml_acosf (float a)
{

    float va1;
    float vr1;
    unsigned int vm;

    float r;

    va1 = a;;

    __internal_sacos_la_cout (&va1, &vr1);
    r = vr1;;

    return r;

}