1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) {
; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0]
; CHECK-NEXT: ret
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer
%interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
store <8 x i32> %interleaved.vec, ptr %a, align 4
ret void
}
define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2) {
; CHECK-LABEL: interleave_store_without_splat:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0]
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%interleaved = shufflevector <8 x i32> %shuffle, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
store <8 x i32> %interleaved, ptr %a, align 1
ret void
}
define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2) {
; CHECK-LABEL: interleave_store_legalization:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #8 // =0x8
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z2_z3 def $z2_z3
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: mov z4.d, z0.d
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0]
; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2]
; CHECK-NEXT: ret
%interleaved.vec = shufflevector <8 x i32> %v1, <8 x i32> %v2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11,
i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
store <16 x i32> %interleaved.vec, ptr %a, align 4
ret void
}
; Ensure we don't crash when trying to lower a shuffle via an extract
define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) {
; CHECK-LABEL: crash_when_lowering_extract_shuffle:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
br i1 %cond, label %exit, label %vector.body
vector.body:
%1 = load <32 x i32>, ptr %dst, align 16
%predphi = select <32 x i1> %broadcast.splat, <32 x i32> zeroinitializer, <32 x i32> %1
store <32 x i32> %predphi, ptr %dst, align 16
br label %exit
exit:
ret void
}
|