File: fp16_code_gen.ispc

package info (click to toggle)
ispc 1.28.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 97,620 kB
  • sloc: cpp: 77,067; python: 8,303; yacc: 3,337; lex: 1,126; ansic: 631; sh: 475; makefile: 17
file content (235 lines) | stat: -rw-r--r-- 4,979 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// RUN: %{ispc} %s --target=avx512spr-x4 --emit-asm -o - | FileCheck %s -check-prefix=CHECK_SPR
// RUN: %{ispc} %s --target=avx512spr-x8 --emit-asm -o - | FileCheck %s -check-prefix=CHECK_SPR

// -x16 target has fails for half -> int16/uint16 converts with LLVM 14. For 15&16 backport of
// the fix is available, and it's part of upstream in 17.0. So x16 is disabled for now.
// %{ispc} %s --target=avx512spr-x16 --emit-asm -o - | FileCheck %s -check-prefix=CHECK_SPR

// RUN: %{ispc} %s --target=avx512spr-x32 --emit-asm -o - | FileCheck %s -check-prefix=CHECK_SPR
// RUN: %{ispc} %s --target=avx512spr-x64 --emit-asm -o - | FileCheck %s -check-prefix=CHECK_SPR

// REQUIRES: X86_ENABLED && !MACOS_HOST

// CHECK_SPR-LABEL: fp16_add
// CHECK_SPR: vaddph
unmasked float16 fp16_add(float16 a, float16 b) {
  return a+b;
}

// CHECK_SPR-LABEL: fp16_sub
// CHECK_SPR: vsubph
unmasked float16 fp16_sub(float16 a, float16 b) {
  return a-b;
}

// CHECK_SPR-LABEL: fp16_mul
// CHECK_SPR: vmulph
unmasked float16 fp16_mul(float16 a, float16 b) {
  return a*b;
}

// CHECK_SPR-LABEL: fp16_div
// CHECK_SPR: vdivph
unmasked float16 fp16_div(float16 a, float16 b) {
  return a/b;
}

// CHECK_SPR-LABEL: fp16_sqrt
// CHECK_SPR: vsqrtph
unmasked float16 fp16_sqrt(float16 a) {
  return sqrt(a);
}

// CHECK_SPR-LABEL: fp16_rsqrt
// CHECK_SPR: vrsqrtph
unmasked float16 fp16_rsqrt(float16 a) {
  return rsqrt(a);
}

// CHECK_SPR-LABEL: fp16_rcp
// CHECK_SPR: vrcpph
unmasked float16 fp16_rcp(float16 a) {
  return rcp(a);
}

// CHECK_SPR-LABEL: fp16_min
// CHECK_SPR: vminph
unmasked float16 fp16_min(float16 a, float16 b) {
  return min(a, b);
}

// CHECK_SPR-LABEL: fp16_max
// CHECK_SPR: vmaxph
unmasked float16 fp16_max(float16 a, float16 b) {
  return max(a, b);
}

// CHECK_SPR-LABEL: fp16_cmp
// CHECK_SPR: vcmp{{lt|gt}}ph
unmasked bool fp16_cmp(float16 a, float16 b) {
  return a>b;
}

// CHECK_SPR-LABEL: fp16_fma
// CHECK_SPR: vfmadd213ph
unmasked float16 fp16_fma(float16 a, float16 b, float16 c) {
  return a*b+c;
}

// CHECK_SPR-LABEL: fp16_fnma
// CHECK_SPR: vfnmadd213ph
unmasked float16 fp16_fnma(float16 a, float16 b, float16 c) {
  return -a*b+c;
}

// CHECK_SPR-LABEL: fp16_fms
// CHECK_SPR: vfmsub213ph
unmasked float16 fp16_fms(float16 a, float16 b, float16 c) {
  return a*b-c;
}

// CHECK_SPR-LABEL: fp16_fnms
// CHECK_SPR: vfnmsub213ph
unmasked float16 fp16_fnms(float16 a, float16 b, float16 c) {
  return -a*b-c;
}

// CHECK_SPR-LABEL: fp16_round
// CHECK_SPR: vrndscale{{sh|ph}} $8
unmasked float16 fp16_round(float16 a) {
  return round(a);
}

// CHECK_SPR-LABEL: fp16_floor
// CHECK_SPR: vrndscaleph $9
unmasked float16 fp16_floor(float16 a) {
  return floor(a);
}

// CHECK_SPR-LABEL: fp16_ceil
// CHECK_SPR: vrndscaleph $10
unmasked float16 fp16_ceil(float16 a) {
  return ceil(a);
}

// CHECK_SPR-LABEL: fp16_trunc
// CHECK_SPR: vrndscaleph $11
unmasked float16 fp16_trunc(float16 a) {
  return trunc(a);
}

///////////////////////////////////////////////
// Converts - floating point

// double

// CHECK_SPR-LABEL: fp16_cvt_pd2ph
// CHECK_SPR: vcvtpd2ph
unmasked float16 fp16_cvt_pd2ph(double a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2pd
// CHECK_SPR: vcvtph2pd
unmasked double fp16_cvt_ph2pd(float16 a) {
  return a;
}

// float

// CHECK_SPR-LABEL: fp16_cvt_ps2ph
// CHECK_SPR: vcvtps2ph
unmasked float16 fp16_cvt_ps2ph(float a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2ps
// CHECK_SPR: vcvtph2ps
unmasked float fp16_cvt_ph2ps(float16 a) {
  return a;
}

// int64

// CHECK_SPR-LABEL: fp16_cvt_qq2ph
// CHECK_SPR: vcvtqq2ph
unmasked float16 fp16_cvt_qq2ph(int64 a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2qq
// CHECK_SPR: vcvttph2qq
unmasked int64 fp16_cvt_ph2qq(float16 a) {
  return a;
}

// uint64

// CHECK_SPR-LABEL: fp16_cvt_uqq2ph
// CHECK_SPR: vcvtuqq2ph
unmasked float16 fp16_cvt_uqq2ph(uint64 a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2uqq
// CHECK_SPR: vcvttph2uqq
unmasked uint64 fp16_cvt_ph2uqq(float16 a) {
  return a;
}

// int32

// CHECK_SPR-LABEL: fp16_cvt_dq2ph
// CHECK_SPR: vcvtdq2ph
unmasked float16 fp16_cvt_dq2ph(int32 a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2dq
// CHECK_SPR: vcvttph2dq
unmasked int fp16_cvt_ph2dq(float16 a) {
  return a;
}

// uint32

// CHECK_SPR-LABEL: fp16_cvt_udq2ph
// CHECK_SPR: vcvtudq2ph
unmasked float16 fp16_cvt_udq2ph(uint32 a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2udq
// CHECK_SPR: vcvttph2udq
unmasked uint32 fp16_cvt_ph2udq(float16 a) {
  return a;
}

// int16

// CHECK_SPR-LABEL: fp16_cvt_w2ph
// CHECK_SPR: vcvtw2ph
unmasked float16 fp16_cvt_w2ph(int16 a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2w
// CHECK_SPR: vcvttph2w
unmasked int16 fp16_cvt_ph2w(float16 a) {
  return a;
}

// uint16

// CHECK_SPR-LABEL: fp16_cvt_uw2ph
// CHECK_SPR: vcvtuw2ph
unmasked float16 fp16_cvt_uw2ph(uint16 a) {
  return a;
}

// CHECK_SPR-LABEL: fp16_cvt_ph2uw
// CHECK_SPR: vcvttph2uw
unmasked uint16 fp16_cvt_ph2uw(float16 a) {
  return a;
}