File: erf_s_ep.cl

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (133 lines) | stat: -rw-r--r-- 3,599 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*========================== begin_copyright_notice ============================

Copyright (C) 2024 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/
#include "../imf.h"
#pragma OPENCL FP_CONTRACT OFF
#pragma float_control(precise, on)
#pragma float_control(precise, on)
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___b5 = {0x3605524cu};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___b4 = {0x39953450u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___b3 = {0x3b7e8d75u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___b2 = {0x3d5983e4u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___b1 = {0x3e4635acu};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___b0 = {0x3f906ebau};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___a5 = {0x381cf31fu};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___a4 = {0x3a9b6bd9u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___a3 = {0x3c792ec0u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___a2 = {0x3dec40c3u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___a1 = {0x3f013f71u};
static __constant union {
  unsigned int w;
  float f;
  int i;
} __serf_ep___a0 = {0x3f800000u};
__attribute__((always_inline)) inline int
__ocl_svml_internal_serf_ep(float *a, float *pres) {
  int nRet = 0;
  float xin = *a;
  union {
    unsigned int w;
    float f;
    int i;
  } x, xa, res;
  int iexpon;
  unsigned int sgn_x;
  float dR, dR2;
  union {
    unsigned int w;
    float f;
    int i;
  } apoly, bpoly, Y;
  xa.f = xin;
  sgn_x = xa.w & 0x80000000;
  // |xin|
  xa.w ^= sgn_x;
  // limit |x| range to [0,4]
  dR = (xa.f > 4.0f) ? 4.0f : xa.f;
  dR2 = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(dR, dR, 0.0f);
  // fixup for NaNs
  dR = (xa.w > 0x7f800000uL) ? xa.f : dR;
  // polynomial evaluation
  bpoly.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__serf_ep___b5.f, dR2,
                                                   __serf_ep___b4.f);
  apoly.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__serf_ep___a5.f, dR2,
                                                   __serf_ep___a4.f);
  bpoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b3.f);
  apoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a3.f);
  bpoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b2.f);
  apoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a2.f);
  bpoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b1.f);
  apoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a1.f);
  bpoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR2, __serf_ep___b0.f);
  apoly.f =
      SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(apoly.f, dR2, __serf_ep___a0.f);
  Y.f = 1.0f / apoly.f;
  bpoly.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, dR, 0.0f);
  res.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(bpoly.f, Y.f, 0.0f);
  res.f = (res.f > 1.0f) ? 1.0f : res.f;
  // compiler workaround for NaNs
  res.f = (xa.w <= 0x7f800000) ? res.f : (xa.f + xa.f);
  res.w ^= sgn_x;
  *pres = res.f;
  return nRet;
}
float __ocl_svml_erff_ep(float x) {
  float r;
  __ocl_svml_internal_serf_ep(&x, &r);
  return r;
}