File: atan_s_la.cl

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (177 lines) | stat: -rw-r--r-- 6,289 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*========================== begin_copyright_notice ============================

Copyright (C) 2020-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "../imf.h"
#pragma OPENCL FP_CONTRACT OFF
typedef struct
{
    unsigned int AbsMask;
    unsigned int Shifter;
    unsigned int MaxThreshold;
    unsigned int MOne;
    unsigned int One;
    unsigned int LargeX;
    unsigned int Zero;
    unsigned int Tbl_H[32];
    unsigned int Tbl_L[32];
    unsigned int Pi2;
    unsigned int Pi2_low;
    unsigned int coeff[3];
} __internal_satan_la_data_avx512_t;
static __constant __internal_satan_la_data_avx512_t __internal_satan_la_data_avx512 = {

    0x7fffffffu, 0x4a000000u, 0x40F80000u, 0xbf800000u, 0x3f800000u, 0x4f800000u, 0x00000000u, {
                                                                                                0x00000000u, 0x3e7adbb0u,
                                                                                                0x3eed6338u, 0x3f24bc7du,
                                                                                                0x3f490fdbu, 0x3f6563e3u,
                                                                                                0x3f7b985fu, 0x3f869c79u,
                                                                                                0x3f8db70du, 0x3f93877bu,
                                                                                                0x3f985b6cu, 0x3f9c6b53u,
                                                                                                0x3f9fe0bbu, 0x3fa2daa4u,
                                                                                                0x3fa57088u, 0x3fa7b46fu,
                                                                                                0x3fa9b465u, 0x3fab7b7au,
                                                                                                0x3fad1283u, 0x3fae809eu,
                                                                                                0x3fafcb99u, 0x3fb0f836u,
                                                                                                0x3fb20a6au, 0x3fb30581u,
                                                                                                0x3fb3ec43u, 0x3fb4c10au,
                                                                                                0x3fb585d7u, 0x3fb63c64u,
                                                                                                0x3fb6e62cu, 0x3fb78478u,
                                                                                                0x3fb81868u, 0x3fb8a2f5u,
                                                                                                }
    , {
       0x00000000u, 0xb15a6fe4u,
       0x31ac376au, 0x31c9a7b8u,
       0xb2bbbd2eu, 0xb287b906u,
       0xb2d7e096u, 0x3345ba0au,
       0xb351441cu, 0xb325ac5eu,
       0xb2d2b64bu, 0x334e1335u,
       0x3337a856u, 0x325cd468u,
       0xb2669d97u, 0x33267261u,
       0xb32e1630u, 0xb345c196u,
       0xb35eeb1au, 0x32835b58u,
       0xb32dab7bu, 0x32d52571u,
       0x3281298fu, 0x334736a0u,
       0x326f266fu, 0xb2ac55f6u,
       0x33030c07u, 0xb190a736u,
       0xb2895340u, 0x32e86bfeu,
       0xb2d7f9cdu, 0x3342088au,
       }

    , 0x3fc90FDBu, 0xB33BBD2Eu, {

                                 0xbe0fa8deu, 0x3e4cc8e2u, 0xbeaaaaaau}
};

typedef struct
{
    unsigned int _sSIGN_MASK;
    unsigned int _sABS_MASK;
    unsigned int _sONE;
    unsigned int _sTWO;
    unsigned int _sPIO2;
    unsigned int _sRangeVal;

    unsigned int _sPC8;
    unsigned int _sPC7;
    unsigned int _sPC6;
    unsigned int _sPC5;
    unsigned int _sPC4;

    unsigned int _sPC3;
    unsigned int _sPC2;
    unsigned int _sPC1;
    unsigned int _sPC0;

} __internal_satan_la_data_t;
static __constant __internal_satan_la_data_t __internal_satan_la_data = {
    0x80000000u,
    0x7FFFFFFFu,
    0x3f800000u,
    0x40000000u,
    0x3FC90FDBu,
    0x7f800000u,

    0x3B322CC0u,
    0xBC7F2631u,
    0x3D2BC384u,
    0xBD987629u,
    0x3DD96474u,
    0xBE1161F8u,
    0x3E4CB79Fu,
    0xBEAAAA49u,
    0x3f800000u,
};
static __constant int_float __satan_la_c7 = { 0xbb9ca55fu };
static __constant int_float __satan_la_c6 = { 0x3cc92c07u };
static __constant int_float __satan_la_c5 = { 0xbd755ea6u };
static __constant int_float __satan_la_c4 = { 0x3dcba0b1u };
static __constant int_float __satan_la_c3 = { 0xbe0fa948u };
static __constant int_float __satan_la_c2 = { 0x3e4c81c3u };
static __constant int_float __satan_la_c1 = { 0xbeaaa90bu };
static __constant int_float __satan_la_c0 = { 0x3f7fffffu };

__attribute__((always_inline))
inline int __internal_satan_la_cout (float *pxin, float *pres)
{
    int nRet = 0;
    float xin = *pxin;
    {
        int_float x, xa, hcorr, ya, R0, sres;
        int sgn_x, smask, diff;
        float poly, R;

        x.f = xin;
        xa.w = x.w & 0x7fffffffu;
        sgn_x = x.w ^ xa.w;

        ya.f = 1.0f / (xa.f);
        ya.w ^= 0x80000000u;

        diff = xa.w - ya.w;
        smask = ((int) diff) >> 31;

        hcorr.w = smask & 0x3fc90FDB;

        R0.w = xa.w - (diff & smask);
        R = R0.f * R0.f;

        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (__satan_la_c7.f, R, __satan_la_c6.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __satan_la_c5.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __satan_la_c4.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __satan_la_c3.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __satan_la_c2.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __satan_la_c1.f);
        poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R, __satan_la_c0.f);

        sres.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, R0.f, hcorr.f);

        sres.w = sres.w ^ sgn_x;
        *pres = sres.f;

    }
    return nRet;

}

float __ocl_svml_atanf (float a)
{

    float va1;
    float vr1;
    unsigned int vm;

    float r;

    va1 = a;;

    __internal_satan_la_cout (&va1, &vr1);
    r = vr1;;

    return r;

}