File: atan_s_ep.cl

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (78 lines) | stat: -rw-r--r-- 2,123 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*========================== begin_copyright_notice ============================

Copyright (C) 2024 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/
#include "../imf.h"
#pragma OPENCL FP_CONTRACT OFF
static __constant union {
  unsigned int w;
  float f;
  int i;
} __satan_ep_c4 = {0xbca5054fu};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __satan_ep_c3 = {0x3e49099du};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __satan_ep_c2 = {0xbecbaf63u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __satan_ep_c1 = {0x3bef4e52u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __satan_ep_c0 = {0x3f7ff759u};
__attribute__((always_inline)) inline int
__ocl_svml_internal_satan_ep(float *pxin, float *pres) {
  int nRet = 0;
  float xin = *pxin;
  // float atanf_ep(float xin)
  {
    union {
      unsigned int w;
      float f;
      int i;
    } x, xa, hcorr, ya, R0, sres;
    int sgn_x, smask, sgn_r, diff;
    float poly, R;
    x.f = xin;
    xa.w = x.w & 0x7fffffffu;
    sgn_x = x.w ^ xa.w;
    // y ~ 1/x
    ya.f = 1.0f / (xa.f);
    // smask = (|x|>1.0)? -1 : 0
    diff = ya.w - xa.w;
    smask = ((int)diff) >> 31;
    // will compute pi/2 - atan(1/|x|) for |x|>1
    hcorr.w = smask & 0xbfc90FDB;
    sgn_r = sgn_x ^ (smask & 0x80000000u);
    // reduced argument
    R0.w = xa.w + (diff & smask);
    R = R0.f;
    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__satan_ep_c4.f, R,
                                                  __satan_ep_c3.f);
    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __satan_ep_c2.f);
    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __satan_ep_c1.f);
    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __satan_ep_c0.f);
    //  R0*poly + hcorr
    sres.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R0.f, hcorr.f);
    sres.w = sres.w ^ sgn_r;
    *pres = sres.f;
  }
  return nRet;
}
float __ocl_svml_atanf_ep(float x) {
  float r;
  __ocl_svml_internal_satan_ep(&x, &r);
  return r;
}