1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2024 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#include "../imf.h"
#pragma OPENCL FP_CONTRACT OFF
#pragma float_control(precise, on)
#pragma float_control(precise, on)
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___b5 = {0x3605524cu};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___b4 = {0x39953450u};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___b3 = {0x3b7e8d75u};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___b2 = {0x3d5983e4u};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___b1 = {0x3e4635acu};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___b0 = {0x3f906ebau};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___a5 = {0x381cf31fu};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___a4 = {0x3a9b6bd9u};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___a3 = {0x3c792ec0u};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___a2 = {0x3dec40c3u};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___a1 = {0x3f013f71u};
static __constant union {
unsigned int w;
float f;
int i;
} __serf_ep___a0 = {0x3f800000u};
__attribute__((always_inline)) inline int
__ocl_svml_internal_serf_ep(float *a, float *pres) {
int nRet = 0;
float xin = *a;
union {
unsigned int w;
float f;
int i;
} x, xa, res;
int iexpon;
unsigned int sgn_x;
float dR, dR2;
union {
unsigned int w;
float f;
int i;
} apoly, bpoly, Y;
xa.f = xin;
sgn_x = xa.w & 0x80000000;
// |xin|
xa.w ^= sgn_x;
// limit |x| range to [0,4]
dR = (xa.f > 4.0f) ? 4.0f : xa.f;
dR2 = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(dR, dR, 0.0f);
// fixup for NaNs
dR = (xa.w > 0x7f800000uL) ? xa.f : dR;
// polynomial evaluation
bpoly.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__serf_ep___b5.f, dR2,
__serf_ep___b4.f);
apoly.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__serf_ep___a5.f, dR2,
__serf_ep___a4.f);
bpoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b3.f);
apoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a3.f);
bpoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b2.f);
apoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a2.f);
bpoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b1.f);
apoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a1.f);
bpoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b0.f);
apoly.f =
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a0.f);
Y.f = 1.0f / apoly.f;
bpoly.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR, 0.0f);
res.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, Y.f, 0.0f);
res.f = (res.f > 1.0f) ? 1.0f : res.f;
// compiler workaround for NaNs
res.f = (xa.w <= 0x7f800000) ? res.f : (xa.f + xa.f);
res.w ^= sgn_x;
*pres = res.f;
return nRet;
}
float __ocl_svml_erff_ep(float x) {
float r;
__ocl_svml_internal_serf_ep(&x, &r);
return r;
}
|