File: vstore_half.cl

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (135 lines) | stat: -rw-r--r-- 5,637 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <clc/clc.h>

#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable

#define ROUND_VEC1(out, in, ROUNDF) out = ROUNDF(in);
#define ROUND_VEC2(out, in, ROUNDF)                                            \
  ROUND_VEC1(out.lo, in.lo, ROUNDF);                                           \
  ROUND_VEC1(out.hi, in.hi, ROUNDF);
#define ROUND_VEC3(out, in, ROUNDF)                                            \
  ROUND_VEC1(out.s0, in.s0, ROUNDF);                                           \
  ROUND_VEC1(out.s1, in.s1, ROUNDF);                                           \
  ROUND_VEC1(out.s2, in.s2, ROUNDF);
#define ROUND_VEC4(out, in, ROUNDF)                                            \
  ROUND_VEC2(out.lo, in.lo, ROUNDF);                                           \
  ROUND_VEC2(out.hi, in.hi, ROUNDF);
#define ROUND_VEC8(out, in, ROUNDF)                                            \
  ROUND_VEC4(out.lo, in.lo, ROUNDF);                                           \
  ROUND_VEC4(out.hi, in.hi, ROUNDF);
#define ROUND_VEC16(out, in, ROUNDF)                                           \
  ROUND_VEC8(out.lo, in.lo, ROUNDF);                                           \
  ROUND_VEC8(out.hi, in.hi, ROUNDF);

#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS, ROUNDF)                             \
  void _CLC_OVERLOAD vstore_half_##VEC_SIZE(TYPE, size_t, AS half *);          \
  _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset,     \
                                                  AS half *mem) {              \
    TYPE rounded_vec;                                                          \
    ROUND_VEC##VEC_SIZE(rounded_vec, vec, ROUNDF);                             \
    vstore_half_##VEC_SIZE(rounded_vec, offset, mem);                          \
  }                                                                            \
  void _CLC_OVERLOAD vstorea_half_##VEC_SIZE(TYPE, size_t, AS half *);         \
  _CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t offset,    \
                                                   AS half *mem) {             \
    TYPE rounded_vec;                                                          \
    ROUND_VEC##VEC_SIZE(rounded_vec, vec, ROUNDF);                             \
    vstorea_half_##VEC_SIZE(rounded_vec, offset, mem);                         \
  }

_CLC_DEF _CLC_OVERLOAD float __clc_rtz(float x) {
  /* Handle nan corner case */
  if (isnan(x))
    return x;
  /* RTZ does not produce Inf for large numbers */
  if (fabs(x) > 65504.0f && !isinf(x))
    return copysign(65504.0f, x);

  const int exp = (as_uint(x) >> 23 & 0xff) - 127;
  /* Manage range rounded to +- zero explicitely */
  if (exp < -24)
    return copysign(0.0f, x);

  /* Remove lower 13 bits to make sure the number is rounded down */
  int mask = 0xffffe000;
  /* Denormals cannot be flushed, and they use different bit for rounding */
  if (exp < -14)
    mask <<= min(-(exp + 14), 10);

  return as_float(as_uint(x) & mask);
}

_CLC_DEF _CLC_OVERLOAD float __clc_rti(float x) {
  /* Handle nan corner case */
  if (isnan(x))
    return x;

  const float inf = copysign(INFINITY, x);
  uint ux = as_uint(x);

  /* Manage +- infinity explicitely */
  if (as_float(ux & 0x7fffffff) > 0x1.ffcp+15f) {
    return inf;
  }
  /* Manage +- zero explicitely */
  if ((ux & 0x7fffffff) == 0) {
    return copysign(0.0f, x);
  }

  const int exp = (as_uint(x) >> 23 & 0xff) - 127;
  /* Manage range rounded to smallest half denormal explicitely */
  if (exp < -24) {
    return copysign(0x1.0p-24f, x);
  }

  /* Set lower 13 bits */
  int mask = (1 << 13) - 1;
  /* Denormals cannot be flushed, and they use different bit for rounding */
  if (exp < -14) {
    mask = (1 << (13 + min(-(exp + 14), 10))) - 1;
  }

  const float next = nextafter(as_float(ux | mask), inf);
  return ((ux & mask) == 0) ? as_float(ux) : next;
}
_CLC_DEF _CLC_OVERLOAD float __clc_rtn(float x) {
  return ((as_uint(x) & 0x80000000) == 0) ? __clc_rtz(x) : __clc_rti(x);
}
_CLC_DEF _CLC_OVERLOAD float __clc_rtp(float x) {
  return ((as_uint(x) & 0x80000000) == 0) ? __clc_rti(x) : __clc_rtz(x);
}
_CLC_DEF _CLC_OVERLOAD float __clc_rte(float x) {
  /* Mantisa + implicit bit */
  const uint mantissa = (as_uint(x) & 0x7fffff) | (1u << 23);
  const int exp = (as_uint(x) >> 23 & 0xff) - 127;
  int shift = 13;
  if (exp < -14) {
    /* The default assumes lower 13 bits are rounded,
     * but it might be more for denormals.
     * Shifting beyond last == 0b, and qr == 00b is not necessary */
    shift += min(-(exp + 14), 15);
  }
  int mask = (1 << shift) - 1;
  const uint grs = mantissa & mask;
  const uint last = mantissa & (1 << shift);
  /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1.
   * exp > 15 should round to inf. */
  bool roundup = (grs > (1 << (shift - 1))) ||
                 (grs == (1 << (shift - 1)) && last != 0) || (exp > 15);
  return roundup ? __clc_rti(x) : __clc_rtz(x);
}

#define __XFUNC(SUFFIX, VEC_SIZE, TYPE, AS)                                    \
  __FUNC(SUFFIX, VEC_SIZE, TYPE, AS, __clc_rte)                                \
  __FUNC(SUFFIX##_rtz, VEC_SIZE, TYPE, AS, __clc_rtz)                          \
  __FUNC(SUFFIX##_rtn, VEC_SIZE, TYPE, AS, __clc_rtn)                          \
  __FUNC(SUFFIX##_rtp, VEC_SIZE, TYPE, AS, __clc_rtp)                          \
  __FUNC(SUFFIX##_rte, VEC_SIZE, TYPE, AS, __clc_rte)

#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __XFUNC(SUFFIX, VEC_SIZE, TYPE, AS)

#define __CLC_BODY "vstore_half.inc"
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef FUNC
#undef __XFUNC
#undef __FUNC