1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2024 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#include "../imf.h"
#pragma OPENCL FP_CONTRACT OFF
static __constant union {
unsigned int w;
float f;
int i;
} __satan_ep_c4 = {0xbca5054fu};
static __constant union {
unsigned int w;
float f;
int i;
} __satan_ep_c3 = {0x3e49099du};
static __constant union {
unsigned int w;
float f;
int i;
} __satan_ep_c2 = {0xbecbaf63u};
static __constant union {
unsigned int w;
float f;
int i;
} __satan_ep_c1 = {0x3bef4e52u};
static __constant union {
unsigned int w;
float f;
int i;
} __satan_ep_c0 = {0x3f7ff759u};
__attribute__((always_inline)) inline int
__ocl_svml_internal_satan_ep(float *pxin, float *pres) {
int nRet = 0;
float xin = *pxin;
// float atanf_ep(float xin)
{
union {
unsigned int w;
float f;
int i;
} x, xa, hcorr, ya, R0, sres;
int sgn_x, smask, sgn_r, diff;
float poly, R;
x.f = xin;
xa.w = x.w & 0x7fffffffu;
sgn_x = x.w ^ xa.w;
// y ~ 1/x
ya.f = 1.0f / (xa.f);
// smask = (|x|>1.0)? -1 : 0
diff = ya.w - xa.w;
smask = ((int)diff) >> 31;
// will compute pi/2 - atan(1/|x|) for |x|>1
hcorr.w = smask & 0xbfc90FDB;
sgn_r = sgn_x ^ (smask & 0x80000000u);
// reduced argument
R0.w = xa.w + (diff & smask);
R = R0.f;
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__satan_ep_c4.f, R,
__satan_ep_c3.f);
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __satan_ep_c2.f);
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __satan_ep_c1.f);
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __satan_ep_c0.f);
// R0*poly + hcorr
sres.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R0.f, hcorr.f);
sres.w = sres.w ^ sgn_r;
*pres = sres.f;
}
return nRet;
}
float __ocl_svml_atanf_ep(float x) {
float r;
__ocl_svml_internal_satan_ep(&x, &r);
return r;
}
|