File: unaligned-param-load-store.ll

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 2,245,044 kB
  • sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,666; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (508 lines) | stat: -rw-r--r-- 22,500 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Verifies correctness of load/store of parameters and return values.
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}

%s_i8i16p = type { <{ i16, i8, i16 }>, i64 }
%s_i8i32p = type { <{ i32, i8, i32 }>, i64 }
%s_i8i64p = type { <{ i64, i8, i64 }>, i64 }
%s_i8f16p = type { <{ half, i8, half }>, i64 }
%s_i8f16x2p = type { <{ <2 x half>, i8, <2 x half> }>, i64 }
%s_i8f32p = type { <{ float, i8, float }>, i64 }
%s_i8f64p = type { <{ double, i8, double }>, i64 }

; -- All loads/stores from parameters aligned by one must be done one
;    byte at a time.
; -- Notes:
;   -- There are two fields of interest in the packed part of the struct, one
;      with a proper offset and one without. The former should be loaded or
;      stored as a whole, and the latter by bytes.
;   -- Only loading and storing the said fields are checked in the following
;      series of tests so that they are more concise.


define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
; CHECK-LABEL: test_s_i8i16p(
; CHECK:       {
; CHECK-NEXT:    .reg .b16 %rs<15>;
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b8 %rs4, [test_s_i8i16p_param_0+4];
; CHECK-NEXT:    shl.b16 %rs5, %rs4, 8;
; CHECK-NEXT:    ld.param.b8 %rs6, [test_s_i8i16p_param_0+3];
; CHECK-NEXT:    or.b16 %rs3, %rs5, %rs6;
; CHECK-NEXT:    ld.param.b64 %rd1, [test_s_i8i16p_param_0+8];
; CHECK-NEXT:    ld.param.b8 %rs2, [test_s_i8i16p_param_0+2];
; CHECK-NEXT:    ld.param.b16 %rs1, [test_s_i8i16p_param_0];
; CHECK-NEXT:    { // callseq 0, 0
; CHECK-NEXT:    .param .align 8 .b8 param0[16];
; CHECK-NEXT:    st.param.b16 [param0], %rs1;
; CHECK-NEXT:    st.param.b8 [param0+2], %rs2;
; CHECK-NEXT:    st.param.b8 [param0+3], %rs3;
; CHECK-NEXT:    st.param.b8 [param0+4], %rs4;
; CHECK-NEXT:    st.param.b64 [param0+8], %rd1;
; CHECK-NEXT:    .param .align 8 .b8 retval0[16];
; CHECK-NEXT:    call.uni (retval0), test_s_i8i16p, (param0);
; CHECK-NEXT:    ld.param.b16 %rs7, [retval0];
; CHECK-NEXT:    ld.param.b8 %rs8, [retval0+2];
; CHECK-NEXT:    ld.param.b8 %rs9, [retval0+3];
; CHECK-NEXT:    ld.param.b8 %rs10, [retval0+4];
; CHECK-NEXT:    ld.param.b64 %rd2, [retval0+8];
; CHECK-NEXT:    } // callseq 0
; CHECK-NEXT:    st.param.b16 [func_retval0], %rs7;
; CHECK-NEXT:    st.param.b8 [func_retval0+2], %rs8;
; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs10;
; CHECK-NEXT:    st.param.b8 [func_retval0+3], %rs9;
; CHECK-NEXT:    st.param.b64 [func_retval0+8], %rd2;
; CHECK-NEXT:    ret;
  %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
  ret %s_i8i16p %r
}


define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
; CHECK-LABEL: test_s_i8i32p(
; CHECK:       {
; CHECK-NEXT:    .reg .b16 %rs<12>;
; CHECK-NEXT:    .reg .b32 %r<20>;
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b8 %r3, [test_s_i8i32p_param_0+6];
; CHECK-NEXT:    shl.b32 %r4, %r3, 8;
; CHECK-NEXT:    ld.param.b8 %r5, [test_s_i8i32p_param_0+5];
; CHECK-NEXT:    or.b32 %r6, %r4, %r5;
; CHECK-NEXT:    ld.param.b8 %r7, [test_s_i8i32p_param_0+7];
; CHECK-NEXT:    shl.b32 %r8, %r7, 16;
; CHECK-NEXT:    ld.param.b8 %r9, [test_s_i8i32p_param_0+8];
; CHECK-NEXT:    shl.b32 %r10, %r9, 24;
; CHECK-NEXT:    or.b32 %r11, %r10, %r8;
; CHECK-NEXT:    or.b32 %r2, %r11, %r6;
; CHECK-NEXT:    ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
; CHECK-NEXT:    ld.param.b8 %rs1, [test_s_i8i32p_param_0+4];
; CHECK-NEXT:    ld.param.b32 %r1, [test_s_i8i32p_param_0];
; CHECK-NEXT:    shr.u32 %r12, %r2, 8;
; CHECK-NEXT:    shr.u32 %r13, %r11, 16;
; CHECK-NEXT:    { // callseq 1, 0
; CHECK-NEXT:    .param .align 8 .b8 param0[24];
; CHECK-NEXT:    st.param.b32 [param0], %r1;
; CHECK-NEXT:    st.param.b8 [param0+4], %rs1;
; CHECK-NEXT:    st.param.b8 [param0+5], %r2;
; CHECK-NEXT:    st.param.b8 [param0+6], %r12;
; CHECK-NEXT:    st.param.b8 [param0+7], %r13;
; CHECK-NEXT:    st.param.b8 [param0+8], %r9;
; CHECK-NEXT:    st.param.b64 [param0+16], %rd1;
; CHECK-NEXT:    .param .align 8 .b8 retval0[24];
; CHECK-NEXT:    call.uni (retval0), test_s_i8i32p, (param0);
; CHECK-NEXT:    ld.param.b32 %r14, [retval0];
; CHECK-NEXT:    ld.param.b8 %rs2, [retval0+4];
; CHECK-NEXT:    ld.param.b8 %rs3, [retval0+5];
; CHECK-NEXT:    ld.param.b8 %rs4, [retval0+6];
; CHECK-NEXT:    ld.param.b8 %rs5, [retval0+7];
; CHECK-NEXT:    ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT:    ld.param.b64 %rd2, [retval0+16];
; CHECK-NEXT:    } // callseq 1
; CHECK-NEXT:    cvt.u32.u16 %r15, %rs3;
; CHECK-NEXT:    cvt.u32.u16 %r16, %rs4;
; CHECK-NEXT:    cvt.u32.u16 %r17, %rs5;
; CHECK-NEXT:    cvt.u32.u16 %r18, %rs6;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r14;
; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs2;
; CHECK-NEXT:    st.param.b8 [func_retval0+8], %r18;
; CHECK-NEXT:    st.param.b8 [func_retval0+7], %r17;
; CHECK-NEXT:    st.param.b8 [func_retval0+6], %r16;
; CHECK-NEXT:    st.param.b8 [func_retval0+5], %r15;
; CHECK-NEXT:    st.param.b64 [func_retval0+16], %rd2;
; CHECK-NEXT:    ret;
  %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
  ret %s_i8i32p %r
}


define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
; CHECK-LABEL: test_s_i8i64p(
; CHECK:       {
; CHECK-NEXT:    .reg .b16 %rs<20>;
; CHECK-NEXT:    .reg .b64 %rd<68>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b8 %rd4, [test_s_i8i64p_param_0+10];
; CHECK-NEXT:    shl.b64 %rd5, %rd4, 8;
; CHECK-NEXT:    ld.param.b8 %rd6, [test_s_i8i64p_param_0+9];
; CHECK-NEXT:    or.b64 %rd7, %rd5, %rd6;
; CHECK-NEXT:    ld.param.b8 %rd8, [test_s_i8i64p_param_0+11];
; CHECK-NEXT:    shl.b64 %rd9, %rd8, 16;
; CHECK-NEXT:    ld.param.b8 %rd10, [test_s_i8i64p_param_0+12];
; CHECK-NEXT:    shl.b64 %rd11, %rd10, 24;
; CHECK-NEXT:    or.b64 %rd12, %rd11, %rd9;
; CHECK-NEXT:    or.b64 %rd13, %rd12, %rd7;
; CHECK-NEXT:    ld.param.b8 %rd14, [test_s_i8i64p_param_0+14];
; CHECK-NEXT:    shl.b64 %rd15, %rd14, 8;
; CHECK-NEXT:    ld.param.b8 %rd16, [test_s_i8i64p_param_0+13];
; CHECK-NEXT:    or.b64 %rd17, %rd15, %rd16;
; CHECK-NEXT:    ld.param.b8 %rd18, [test_s_i8i64p_param_0+15];
; CHECK-NEXT:    shl.b64 %rd19, %rd18, 16;
; CHECK-NEXT:    ld.param.b8 %rd20, [test_s_i8i64p_param_0+16];
; CHECK-NEXT:    shl.b64 %rd21, %rd20, 24;
; CHECK-NEXT:    or.b64 %rd22, %rd21, %rd19;
; CHECK-NEXT:    or.b64 %rd23, %rd22, %rd17;
; CHECK-NEXT:    shl.b64 %rd24, %rd23, 32;
; CHECK-NEXT:    or.b64 %rd2, %rd24, %rd13;
; CHECK-NEXT:    ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
; CHECK-NEXT:    ld.param.b8 %rs1, [test_s_i8i64p_param_0+8];
; CHECK-NEXT:    ld.param.b64 %rd1, [test_s_i8i64p_param_0];
; CHECK-NEXT:    shr.u64 %rd25, %rd2, 8;
; CHECK-NEXT:    shr.u64 %rd26, %rd2, 16;
; CHECK-NEXT:    shr.u64 %rd27, %rd2, 24;
; CHECK-NEXT:    bfe.u64 %rd28, %rd23, 8, 24;
; CHECK-NEXT:    bfe.u64 %rd29, %rd23, 16, 16;
; CHECK-NEXT:    bfe.u64 %rd30, %rd23, 24, 8;
; CHECK-NEXT:    { // callseq 2, 0
; CHECK-NEXT:    .param .align 8 .b8 param0[32];
; CHECK-NEXT:    st.param.b64 [param0], %rd1;
; CHECK-NEXT:    st.param.b8 [param0+8], %rs1;
; CHECK-NEXT:    st.param.b8 [param0+9], %rd2;
; CHECK-NEXT:    st.param.b8 [param0+10], %rd25;
; CHECK-NEXT:    st.param.b8 [param0+11], %rd26;
; CHECK-NEXT:    st.param.b8 [param0+12], %rd27;
; CHECK-NEXT:    st.param.b8 [param0+13], %rd23;
; CHECK-NEXT:    st.param.b8 [param0+14], %rd28;
; CHECK-NEXT:    st.param.b8 [param0+15], %rd29;
; CHECK-NEXT:    st.param.b8 [param0+16], %rd30;
; CHECK-NEXT:    st.param.b64 [param0+24], %rd3;
; CHECK-NEXT:    .param .align 8 .b8 retval0[32];
; CHECK-NEXT:    call.uni (retval0), test_s_i8i64p, (param0);
; CHECK-NEXT:    ld.param.b64 %rd31, [retval0];
; CHECK-NEXT:    ld.param.b8 %rs2, [retval0+8];
; CHECK-NEXT:    ld.param.b8 %rs3, [retval0+9];
; CHECK-NEXT:    ld.param.b8 %rs4, [retval0+10];
; CHECK-NEXT:    ld.param.b8 %rs5, [retval0+11];
; CHECK-NEXT:    ld.param.b8 %rs6, [retval0+12];
; CHECK-NEXT:    ld.param.b8 %rs7, [retval0+13];
; CHECK-NEXT:    ld.param.b8 %rs8, [retval0+14];
; CHECK-NEXT:    ld.param.b8 %rs9, [retval0+15];
; CHECK-NEXT:    ld.param.b8 %rs10, [retval0+16];
; CHECK-NEXT:    ld.param.b64 %rd32, [retval0+24];
; CHECK-NEXT:    } // callseq 2
; CHECK-NEXT:    cvt.u64.u16 %rd33, %rs3;
; CHECK-NEXT:    and.b64 %rd34, %rd33, 255;
; CHECK-NEXT:    cvt.u64.u16 %rd35, %rs4;
; CHECK-NEXT:    and.b64 %rd36, %rd35, 255;
; CHECK-NEXT:    shl.b64 %rd37, %rd36, 8;
; CHECK-NEXT:    or.b64 %rd38, %rd34, %rd37;
; CHECK-NEXT:    cvt.u64.u16 %rd39, %rs5;
; CHECK-NEXT:    and.b64 %rd40, %rd39, 255;
; CHECK-NEXT:    shl.b64 %rd41, %rd40, 16;
; CHECK-NEXT:    or.b64 %rd42, %rd38, %rd41;
; CHECK-NEXT:    cvt.u64.u16 %rd43, %rs6;
; CHECK-NEXT:    and.b64 %rd44, %rd43, 255;
; CHECK-NEXT:    shl.b64 %rd45, %rd44, 24;
; CHECK-NEXT:    or.b64 %rd46, %rd42, %rd45;
; CHECK-NEXT:    cvt.u64.u16 %rd47, %rs7;
; CHECK-NEXT:    and.b64 %rd48, %rd47, 255;
; CHECK-NEXT:    shl.b64 %rd49, %rd48, 32;
; CHECK-NEXT:    or.b64 %rd50, %rd46, %rd49;
; CHECK-NEXT:    cvt.u64.u16 %rd51, %rs8;
; CHECK-NEXT:    and.b64 %rd52, %rd51, 255;
; CHECK-NEXT:    shl.b64 %rd53, %rd52, 40;
; CHECK-NEXT:    or.b64 %rd54, %rd50, %rd53;
; CHECK-NEXT:    cvt.u64.u16 %rd55, %rs9;
; CHECK-NEXT:    and.b64 %rd56, %rd55, 255;
; CHECK-NEXT:    shl.b64 %rd57, %rd56, 48;
; CHECK-NEXT:    or.b64 %rd58, %rd54, %rd57;
; CHECK-NEXT:    cvt.u64.u16 %rd59, %rs10;
; CHECK-NEXT:    shl.b64 %rd60, %rd59, 56;
; CHECK-NEXT:    or.b64 %rd61, %rd58, %rd60;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd31;
; CHECK-NEXT:    st.param.b8 [func_retval0+8], %rs2;
; CHECK-NEXT:    st.param.b8 [func_retval0+12], %rd43;
; CHECK-NEXT:    st.param.b8 [func_retval0+11], %rd39;
; CHECK-NEXT:    st.param.b8 [func_retval0+10], %rd35;
; CHECK-NEXT:    st.param.b8 [func_retval0+9], %rd33;
; CHECK-NEXT:    shr.u64 %rd64, %rd50, 32;
; CHECK-NEXT:    st.param.b8 [func_retval0+13], %rd64;
; CHECK-NEXT:    shr.u64 %rd65, %rd54, 40;
; CHECK-NEXT:    st.param.b8 [func_retval0+14], %rd65;
; CHECK-NEXT:    shr.u64 %rd66, %rd58, 48;
; CHECK-NEXT:    st.param.b8 [func_retval0+15], %rd66;
; CHECK-NEXT:    shr.u64 %rd67, %rd61, 56;
; CHECK-NEXT:    st.param.b8 [func_retval0+16], %rd67;
; CHECK-NEXT:    st.param.b64 [func_retval0+24], %rd32;
; CHECK-NEXT:    ret;
  %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
  ret %s_i8i64p %r
}


define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
; CHECK-LABEL: test_s_i8f16p(
; CHECK:       {
; CHECK-NEXT:    .reg .b16 %rs<15>;
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b8 %rs4, [test_s_i8f16p_param_0+4];
; CHECK-NEXT:    shl.b16 %rs5, %rs4, 8;
; CHECK-NEXT:    ld.param.b8 %rs6, [test_s_i8f16p_param_0+3];
; CHECK-NEXT:    or.b16 %rs3, %rs5, %rs6;
; CHECK-NEXT:    ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
; CHECK-NEXT:    ld.param.b8 %rs2, [test_s_i8f16p_param_0+2];
; CHECK-NEXT:    ld.param.b16 %rs1, [test_s_i8f16p_param_0];
; CHECK-NEXT:    { // callseq 3, 0
; CHECK-NEXT:    .param .align 8 .b8 param0[16];
; CHECK-NEXT:    st.param.b16 [param0], %rs1;
; CHECK-NEXT:    st.param.b8 [param0+2], %rs2;
; CHECK-NEXT:    st.param.b8 [param0+3], %rs3;
; CHECK-NEXT:    st.param.b8 [param0+4], %rs4;
; CHECK-NEXT:    st.param.b64 [param0+8], %rd1;
; CHECK-NEXT:    .param .align 8 .b8 retval0[16];
; CHECK-NEXT:    call.uni (retval0), test_s_i8f16p, (param0);
; CHECK-NEXT:    ld.param.b16 %rs7, [retval0];
; CHECK-NEXT:    ld.param.b8 %rs8, [retval0+2];
; CHECK-NEXT:    ld.param.b8 %rs9, [retval0+3];
; CHECK-NEXT:    ld.param.b8 %rs10, [retval0+4];
; CHECK-NEXT:    ld.param.b64 %rd2, [retval0+8];
; CHECK-NEXT:    } // callseq 3
; CHECK-NEXT:    st.param.b16 [func_retval0], %rs7;
; CHECK-NEXT:    st.param.b8 [func_retval0+2], %rs8;
; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs10;
; CHECK-NEXT:    st.param.b8 [func_retval0+3], %rs9;
; CHECK-NEXT:    st.param.b64 [func_retval0+8], %rd2;
; CHECK-NEXT:    ret;
  %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
  ret %s_i8f16p %r
}


define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
; CHECK-LABEL: test_s_i8f16x2p(
; CHECK:       {
; CHECK-NEXT:    .reg .b16 %rs<12>;
; CHECK-NEXT:    .reg .b32 %r<20>;
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6];
; CHECK-NEXT:    shl.b32 %r4, %r3, 8;
; CHECK-NEXT:    ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5];
; CHECK-NEXT:    or.b32 %r6, %r4, %r5;
; CHECK-NEXT:    ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7];
; CHECK-NEXT:    shl.b32 %r8, %r7, 16;
; CHECK-NEXT:    ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8];
; CHECK-NEXT:    shl.b32 %r10, %r9, 24;
; CHECK-NEXT:    or.b32 %r11, %r10, %r8;
; CHECK-NEXT:    or.b32 %r2, %r11, %r6;
; CHECK-NEXT:    ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
; CHECK-NEXT:    ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4];
; CHECK-NEXT:    ld.param.b32 %r1, [test_s_i8f16x2p_param_0];
; CHECK-NEXT:    shr.u32 %r12, %r2, 8;
; CHECK-NEXT:    shr.u32 %r13, %r11, 16;
; CHECK-NEXT:    { // callseq 4, 0
; CHECK-NEXT:    .param .align 8 .b8 param0[24];
; CHECK-NEXT:    st.param.b32 [param0], %r1;
; CHECK-NEXT:    st.param.b8 [param0+4], %rs1;
; CHECK-NEXT:    st.param.b8 [param0+5], %r2;
; CHECK-NEXT:    st.param.b8 [param0+6], %r12;
; CHECK-NEXT:    st.param.b8 [param0+7], %r13;
; CHECK-NEXT:    st.param.b8 [param0+8], %r9;
; CHECK-NEXT:    st.param.b64 [param0+16], %rd1;
; CHECK-NEXT:    .param .align 8 .b8 retval0[24];
; CHECK-NEXT:    call.uni (retval0), test_s_i8f16x2p, (param0);
; CHECK-NEXT:    ld.param.b32 %r14, [retval0];
; CHECK-NEXT:    ld.param.b8 %rs2, [retval0+4];
; CHECK-NEXT:    ld.param.b8 %rs3, [retval0+5];
; CHECK-NEXT:    ld.param.b8 %rs4, [retval0+6];
; CHECK-NEXT:    ld.param.b8 %rs5, [retval0+7];
; CHECK-NEXT:    ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT:    ld.param.b64 %rd2, [retval0+16];
; CHECK-NEXT:    } // callseq 4
; CHECK-NEXT:    cvt.u32.u16 %r15, %rs3;
; CHECK-NEXT:    cvt.u32.u16 %r16, %rs4;
; CHECK-NEXT:    cvt.u32.u16 %r17, %rs5;
; CHECK-NEXT:    cvt.u32.u16 %r18, %rs6;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r14;
; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs2;
; CHECK-NEXT:    st.param.b8 [func_retval0+8], %r18;
; CHECK-NEXT:    st.param.b8 [func_retval0+7], %r17;
; CHECK-NEXT:    st.param.b8 [func_retval0+6], %r16;
; CHECK-NEXT:    st.param.b8 [func_retval0+5], %r15;
; CHECK-NEXT:    st.param.b64 [func_retval0+16], %rd2;
; CHECK-NEXT:    ret;
  %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
  ret %s_i8f16x2p %r
}


define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
; CHECK-LABEL: test_s_i8f32p(
; CHECK:       {
; CHECK-NEXT:    .reg .b16 %rs<12>;
; CHECK-NEXT:    .reg .b32 %r<20>;
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b8 %r3, [test_s_i8f32p_param_0+6];
; CHECK-NEXT:    shl.b32 %r4, %r3, 8;
; CHECK-NEXT:    ld.param.b8 %r5, [test_s_i8f32p_param_0+5];
; CHECK-NEXT:    or.b32 %r6, %r4, %r5;
; CHECK-NEXT:    ld.param.b8 %r7, [test_s_i8f32p_param_0+7];
; CHECK-NEXT:    shl.b32 %r8, %r7, 16;
; CHECK-NEXT:    ld.param.b8 %r9, [test_s_i8f32p_param_0+8];
; CHECK-NEXT:    shl.b32 %r10, %r9, 24;
; CHECK-NEXT:    or.b32 %r11, %r10, %r8;
; CHECK-NEXT:    or.b32 %r2, %r11, %r6;
; CHECK-NEXT:    ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
; CHECK-NEXT:    ld.param.b8 %rs1, [test_s_i8f32p_param_0+4];
; CHECK-NEXT:    ld.param.b32 %r1, [test_s_i8f32p_param_0];
; CHECK-NEXT:    shr.u32 %r12, %r2, 8;
; CHECK-NEXT:    shr.u32 %r13, %r11, 16;
; CHECK-NEXT:    { // callseq 5, 0
; CHECK-NEXT:    .param .align 8 .b8 param0[24];
; CHECK-NEXT:    st.param.b32 [param0], %r1;
; CHECK-NEXT:    st.param.b8 [param0+4], %rs1;
; CHECK-NEXT:    st.param.b8 [param0+5], %r2;
; CHECK-NEXT:    st.param.b8 [param0+6], %r12;
; CHECK-NEXT:    st.param.b8 [param0+7], %r13;
; CHECK-NEXT:    st.param.b8 [param0+8], %r9;
; CHECK-NEXT:    st.param.b64 [param0+16], %rd1;
; CHECK-NEXT:    .param .align 8 .b8 retval0[24];
; CHECK-NEXT:    call.uni (retval0), test_s_i8f32p, (param0);
; CHECK-NEXT:    ld.param.b32 %r14, [retval0];
; CHECK-NEXT:    ld.param.b8 %rs2, [retval0+4];
; CHECK-NEXT:    ld.param.b8 %rs3, [retval0+5];
; CHECK-NEXT:    ld.param.b8 %rs4, [retval0+6];
; CHECK-NEXT:    ld.param.b8 %rs5, [retval0+7];
; CHECK-NEXT:    ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT:    ld.param.b64 %rd2, [retval0+16];
; CHECK-NEXT:    } // callseq 5
; CHECK-NEXT:    cvt.u32.u16 %r15, %rs3;
; CHECK-NEXT:    cvt.u32.u16 %r16, %rs4;
; CHECK-NEXT:    cvt.u32.u16 %r17, %rs5;
; CHECK-NEXT:    cvt.u32.u16 %r18, %rs6;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r14;
; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs2;
; CHECK-NEXT:    st.param.b8 [func_retval0+8], %r18;
; CHECK-NEXT:    st.param.b8 [func_retval0+7], %r17;
; CHECK-NEXT:    st.param.b8 [func_retval0+6], %r16;
; CHECK-NEXT:    st.param.b8 [func_retval0+5], %r15;
; CHECK-NEXT:    st.param.b64 [func_retval0+16], %rd2;
; CHECK-NEXT:    ret;
  %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
  ret %s_i8f32p %r
}


define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
; CHECK-LABEL: test_s_i8f64p(
; CHECK:       {
; CHECK-NEXT:    .reg .b16 %rs<20>;
; CHECK-NEXT:    .reg .b64 %rd<68>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b8 %rd4, [test_s_i8f64p_param_0+10];
; CHECK-NEXT:    shl.b64 %rd5, %rd4, 8;
; CHECK-NEXT:    ld.param.b8 %rd6, [test_s_i8f64p_param_0+9];
; CHECK-NEXT:    or.b64 %rd7, %rd5, %rd6;
; CHECK-NEXT:    ld.param.b8 %rd8, [test_s_i8f64p_param_0+11];
; CHECK-NEXT:    shl.b64 %rd9, %rd8, 16;
; CHECK-NEXT:    ld.param.b8 %rd10, [test_s_i8f64p_param_0+12];
; CHECK-NEXT:    shl.b64 %rd11, %rd10, 24;
; CHECK-NEXT:    or.b64 %rd12, %rd11, %rd9;
; CHECK-NEXT:    or.b64 %rd13, %rd12, %rd7;
; CHECK-NEXT:    ld.param.b8 %rd14, [test_s_i8f64p_param_0+14];
; CHECK-NEXT:    shl.b64 %rd15, %rd14, 8;
; CHECK-NEXT:    ld.param.b8 %rd16, [test_s_i8f64p_param_0+13];
; CHECK-NEXT:    or.b64 %rd17, %rd15, %rd16;
; CHECK-NEXT:    ld.param.b8 %rd18, [test_s_i8f64p_param_0+15];
; CHECK-NEXT:    shl.b64 %rd19, %rd18, 16;
; CHECK-NEXT:    ld.param.b8 %rd20, [test_s_i8f64p_param_0+16];
; CHECK-NEXT:    shl.b64 %rd21, %rd20, 24;
; CHECK-NEXT:    or.b64 %rd22, %rd21, %rd19;
; CHECK-NEXT:    or.b64 %rd23, %rd22, %rd17;
; CHECK-NEXT:    shl.b64 %rd24, %rd23, 32;
; CHECK-NEXT:    or.b64 %rd2, %rd24, %rd13;
; CHECK-NEXT:    ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
; CHECK-NEXT:    ld.param.b8 %rs1, [test_s_i8f64p_param_0+8];
; CHECK-NEXT:    ld.param.b64 %rd1, [test_s_i8f64p_param_0];
; CHECK-NEXT:    shr.u64 %rd25, %rd2, 8;
; CHECK-NEXT:    shr.u64 %rd26, %rd2, 16;
; CHECK-NEXT:    shr.u64 %rd27, %rd2, 24;
; CHECK-NEXT:    bfe.u64 %rd28, %rd23, 8, 24;
; CHECK-NEXT:    bfe.u64 %rd29, %rd23, 16, 16;
; CHECK-NEXT:    bfe.u64 %rd30, %rd23, 24, 8;
; CHECK-NEXT:    { // callseq 6, 0
; CHECK-NEXT:    .param .align 8 .b8 param0[32];
; CHECK-NEXT:    st.param.b64 [param0], %rd1;
; CHECK-NEXT:    st.param.b8 [param0+8], %rs1;
; CHECK-NEXT:    st.param.b8 [param0+9], %rd2;
; CHECK-NEXT:    st.param.b8 [param0+10], %rd25;
; CHECK-NEXT:    st.param.b8 [param0+11], %rd26;
; CHECK-NEXT:    st.param.b8 [param0+12], %rd27;
; CHECK-NEXT:    st.param.b8 [param0+13], %rd23;
; CHECK-NEXT:    st.param.b8 [param0+14], %rd28;
; CHECK-NEXT:    st.param.b8 [param0+15], %rd29;
; CHECK-NEXT:    st.param.b8 [param0+16], %rd30;
; CHECK-NEXT:    st.param.b64 [param0+24], %rd3;
; CHECK-NEXT:    .param .align 8 .b8 retval0[32];
; CHECK-NEXT:    call.uni (retval0), test_s_i8f64p, (param0);
; CHECK-NEXT:    ld.param.b64 %rd31, [retval0];
; CHECK-NEXT:    ld.param.b8 %rs2, [retval0+8];
; CHECK-NEXT:    ld.param.b8 %rs3, [retval0+9];
; CHECK-NEXT:    ld.param.b8 %rs4, [retval0+10];
; CHECK-NEXT:    ld.param.b8 %rs5, [retval0+11];
; CHECK-NEXT:    ld.param.b8 %rs6, [retval0+12];
; CHECK-NEXT:    ld.param.b8 %rs7, [retval0+13];
; CHECK-NEXT:    ld.param.b8 %rs8, [retval0+14];
; CHECK-NEXT:    ld.param.b8 %rs9, [retval0+15];
; CHECK-NEXT:    ld.param.b8 %rs10, [retval0+16];
; CHECK-NEXT:    ld.param.b64 %rd32, [retval0+24];
; CHECK-NEXT:    } // callseq 6
; CHECK-NEXT:    cvt.u64.u16 %rd33, %rs3;
; CHECK-NEXT:    and.b64 %rd34, %rd33, 255;
; CHECK-NEXT:    cvt.u64.u16 %rd35, %rs4;
; CHECK-NEXT:    and.b64 %rd36, %rd35, 255;
; CHECK-NEXT:    shl.b64 %rd37, %rd36, 8;
; CHECK-NEXT:    or.b64 %rd38, %rd34, %rd37;
; CHECK-NEXT:    cvt.u64.u16 %rd39, %rs5;
; CHECK-NEXT:    and.b64 %rd40, %rd39, 255;
; CHECK-NEXT:    shl.b64 %rd41, %rd40, 16;
; CHECK-NEXT:    or.b64 %rd42, %rd38, %rd41;
; CHECK-NEXT:    cvt.u64.u16 %rd43, %rs6;
; CHECK-NEXT:    and.b64 %rd44, %rd43, 255;
; CHECK-NEXT:    shl.b64 %rd45, %rd44, 24;
; CHECK-NEXT:    or.b64 %rd46, %rd42, %rd45;
; CHECK-NEXT:    cvt.u64.u16 %rd47, %rs7;
; CHECK-NEXT:    and.b64 %rd48, %rd47, 255;
; CHECK-NEXT:    shl.b64 %rd49, %rd48, 32;
; CHECK-NEXT:    or.b64 %rd50, %rd46, %rd49;
; CHECK-NEXT:    cvt.u64.u16 %rd51, %rs8;
; CHECK-NEXT:    and.b64 %rd52, %rd51, 255;
; CHECK-NEXT:    shl.b64 %rd53, %rd52, 40;
; CHECK-NEXT:    or.b64 %rd54, %rd50, %rd53;
; CHECK-NEXT:    cvt.u64.u16 %rd55, %rs9;
; CHECK-NEXT:    and.b64 %rd56, %rd55, 255;
; CHECK-NEXT:    shl.b64 %rd57, %rd56, 48;
; CHECK-NEXT:    or.b64 %rd58, %rd54, %rd57;
; CHECK-NEXT:    cvt.u64.u16 %rd59, %rs10;
; CHECK-NEXT:    shl.b64 %rd60, %rd59, 56;
; CHECK-NEXT:    or.b64 %rd61, %rd58, %rd60;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd31;
; CHECK-NEXT:    st.param.b8 [func_retval0+8], %rs2;
; CHECK-NEXT:    st.param.b8 [func_retval0+12], %rd43;
; CHECK-NEXT:    st.param.b8 [func_retval0+11], %rd39;
; CHECK-NEXT:    st.param.b8 [func_retval0+10], %rd35;
; CHECK-NEXT:    st.param.b8 [func_retval0+9], %rd33;
; CHECK-NEXT:    shr.u64 %rd64, %rd50, 32;
; CHECK-NEXT:    st.param.b8 [func_retval0+13], %rd64;
; CHECK-NEXT:    shr.u64 %rd65, %rd54, 40;
; CHECK-NEXT:    st.param.b8 [func_retval0+14], %rd65;
; CHECK-NEXT:    shr.u64 %rd66, %rd58, 48;
; CHECK-NEXT:    st.param.b8 [func_retval0+15], %rd66;
; CHECK-NEXT:    shr.u64 %rd67, %rd61, 56;
; CHECK-NEXT:    st.param.b8 [func_retval0+16], %rd67;
; CHECK-NEXT:    st.param.b64 [func_retval0+24], %rd32;
; CHECK-NEXT:    ret;
  %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
  ret %s_i8f64p %r
}