File: neon.spec

package info (click to toggle)
rustc 1.48.0%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, bullseye-backports
  • size: 275,960 kB
  • sloc: xml: 147,652; ansic: 16,929; sh: 16,839; javascript: 6,817; python: 6,021; cpp: 4,663; makefile: 3,284; asm: 1,437; ruby: 68; awk: 12
file content (469 lines) | stat: -rw-r--r-- 14,425 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
// ARM Neon intrinsic specification.
// 
// This file contains the specification for a number of 
// intrinsics that allows us to generate them along with
// their test cases.
//
// To the syntax of the file - it's not very intelligently parsed!
//
// # Comments
// start with AT LEAST two, or four or more slashes  so // is a
// comment /////// is too.
//
// # Sections
// Sections start with EXACTLY three slashes followed
// by AT LEAST one space. Sections are used for two things:
//
// 1) they serve as the doc comment for the given intrinics.
// 2) they reset all variables (name, fn, etc.)
//
// # Variables
//
// name    - The prefix of the function, suffixes are auto
//           generated by the type they get passed.
//
// fn      - The function to call in rust-land.
//
// aarch64 - The intrinsic to check on aarch64 architecture.
//           If this is given but no arm intrinsic is provided,
//           the function will exclusively be generated for
//           aarch64.
//           This is used to generate both aarch64 specific and
//           shared intrinics by first only specifying th aarch64
//           variant then the arm variant.
// 
// arm     - The arm v7 intrinics used to checked for arm code
//           generation. All neon functions available in arm are
//           also available in aarch64. If no aarch64 intrinic was
//           set they are assumed to be the same.
//           Intrinics ending with a `.` will have a size suffixes
//           added (such as `i8` or `i64`) that is not sign specific
//           Intrinics ending with a `.s` will have a size suffixes
//           added (such as `s8` or `u64`) that is sign specific
//
// a       - First input for tests, it gets scaled to the size of
//           the type.
//
// b       - Second input for tests, it gets scaled to the size of
//           the type.
//
// # special values
//
// TRUE - 'true' all bits are set to 1
// FALSE - 'false' all bits are set to 0
// FF - same as 'true'
// MIN - minimal value (either 0 or the lowest negative number)
// MAX - maximal value propr to overflow
//
// # validate <values>
// Validates a and b aginst the expected result of the test.
// The special values 'TRUE' and 'FALSE' can be used to
// represent the corect NEON representation of true or
// false values. It too gets scaled to the type.
// 
// Validate needs to be called before generate as it sets
// up the rules for validation that get generated for each
// type.
// # generate <types>
// The generate command generates the intrinsics, it uses the
// Variables set and can be called multiple times while overwriting
// some of the variables.

/// Vector bitwise and
name = vand
fn = simd_and
arm = vand
aarch64 = and
a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F
validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
generate int*_t, uint*_t, int64x*_t, uint64x*_t

/// Vector bitwise or (immediate, inclusive)
name = vorr
fn = simd_or
arm = vorr
aarch64 = orr
a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
generate int*_t, uint*_t, int64x*_t, uint64x*_t


/// Vector bitwise exclusive or (vector)
name = veor
fn = simd_xor
arm = veor
aarch64 = eor
a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
generate int*_t, uint*_t, int64x*_t, uint64x*_t

////////////////////
// equality
////////////////////

/// Compare bitwise Equal (vector)
name = vceq
fn = simd_eq
a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX
b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN
validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE

aarch64 = cmeq
generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t

arm = vceq.
generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t

/// Floating-point compare equal
name = vceq
fn = simd_eq
a = 1.2, 3.4, 5.6, 7.8
b = 1.2, 3.4, 5.6, 7.8
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = fcmeq
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

arm = vceq.
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

////////////////////
// greater then
////////////////////

/// Compare signed greater than
name = vcgt
fn = simd_gt
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
aarch64 = cmgt
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t

arm = vcgt.s
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t

/// Compare unsigned highe
name = vcgt
fn = simd_gt
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = cmhi
generate uint64x*_t

arm = vcgt.s
generate uint*_t

/// Floating-point compare greater than
name = vcgt
fn = simd_gt
a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = fcmgt
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

arm = vcgt.s
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

////////////////////
// lesser then
////////////////////

/// Compare signed less than
name = vclt
fn = simd_lt
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
aarch64 = cmgt
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t

arm = vcgt.s
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t

/// Compare unsigned less than
name = vclt
fn = simd_lt
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = cmhi
generate uint64x*_t

arm = vcgt.s
generate uint*_t

/// Floating-point compare less than
name = vclt
fn = simd_lt
a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = fcmgt
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

arm = vcgt.s
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

////////////////////
// lesser then equals
////////////////////

/// Compare signed less than or equal
name = vcle
fn = simd_le
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = cmge
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t

arm = vcge.s
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t

/// Compare unsigned less than or equal
name = vcle
fn = simd_le
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = cmhs
generate uint64x*_t

arm = vcge.s
generate uint*_t

/// Floating-point compare less than or equal
name = vcle
fn = simd_le
a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
aarch64 = fcmge
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
arm = vcge.s
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

////////////////////
// greater then equals
////////////////////

/// Compare signed greater than or equal
name = vcge
fn = simd_ge
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = cmge
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t

arm = vcge.s
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t

/// Compare unsigned greater than or equal
name = vcge
fn = simd_ge
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = cmhs
generate uint64x*_t

arm = vcge.s
generate uint*_t

/// Floating-point compare greater than or equal
name = vcge
fn = simd_ge
a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE

aarch64 = fcmge
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

arm = vcge.s
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

/// Saturating subtract
name = vqsub
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26

arm = vqsub.s
aarch64 = uqsub
link-arm = vqsubu._EXT_
link-aarch64 = uqsub._EXT_
generate uint*_t

arm = vqsub.s
aarch64 = sqsub
link-arm = vqsubs._EXT_
link-aarch64 = sqsub._EXT_
generate int*_t

/// Halving add
name = vhadd
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29


arm = vhadd.s
aarch64 = uhadd
link-aarch64 = uhadd._EXT_
link-arm = vhaddu._EXT_
generate uint*_t


arm = vhadd.s
aarch64 = shadd
link-aarch64 = shadd._EXT_
link-arm = vhadds._EXT_
generate int*_t

/// Rounding halving add
name = vrhadd
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29

arm = vrhadd.s
aarch64 = urhadd
link-arm = vrhaddu._EXT_
link-aarch64 = urhadd._EXT_
generate uint*_t

arm = vrhadd.s
aarch64 = srhadd
link-arm = vrhadds._EXT_
link-aarch64 = srhadd._EXT_
generate int*_t

/// Saturating add
name = vqadd
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58

arm = vqadd.s
aarch64 = uqadd
link-arm = vqaddu._EXT_
link-aarch64 = uqadd._EXT_
generate uint*_t

arm = vqadd.s
aarch64 = sqadd
link-arm = vqadds._EXT_
link-aarch64 = sqadd._EXT_
generate int*_t

// requires 1st and second argument to be different, this not implemented yet
// /// Signed saturating accumulate of unsigned value
// 
// name = vuqadd
// a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
// b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
// e = 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58

// it seems like we don't have those in rustland :( 
// aarch64 = suqadd 
// link-aarch64 = usqadd._EXT_
// generate int64x*_t

/ arm = suqadd
// link-arm = vuqadds._EXT_
// link-aarch64 = suqadd._EXT_
// generate int*_t


/// Multiply
name = vmul
a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
arm = vmul.
aarch64 = mul
fn = simd_mul
generate int*_t, uint*_t

/// Multiply
name = vmul
fn = simd_mul
a = 1.0, 2.0, 1.0, 2.0
b = 2.0, 3.0, 4.0, 5.0
validate 2.0, 6.0, 4.0, 10.0

aarch64 = fmul
generate float64x*_t

arm = vmul.
generate float*_t


/// Subtract
name = vsub
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
arm = vsub.
aarch64 = sub
fn = simd_sub
generate int*_t, uint*_t, int64x*_t, uint64x*_t

/// Subtract
name = vsub
fn = simd_sub
a = 1.0, 4.0, 3.0, 8.0
b = 1.0, 2.0, 3.0, 4.0
validate 0.0, 2.0, 0.0, 4.0

aarch64 = fsub
generate float64x*_t

arm = vsub.
generate float*_t


/// Signed halving subtract
name = vhsub
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7

arm = vhsub.s
aarch64 = uhsub
link-arm = vhsubu._EXT_
link-aarch64 = uhsub._EXT_
generate uint*_t

arm = vhsub.s
aarch64 = shsub
link-arm = vhsubs._EXT_
link-aarch64 = shsub._EXT_
generate int*_t