1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc %s --mattr=+complxnum -o - | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "aarch64-none-linux-gnu"
; Check that deinterleaving pass doesn't generate broken IR
define void @check_deinterleave_crash() #0 {
; CHECK-LABEL: check_deinterleave_crash:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: str wzr, [x8]
bb:
br label %bb173
bb173: ; preds = %bb173, %bb
%phi177 = phi <2 x i32> [ %add190, %bb173 ], [ zeroinitializer, %bb ]
%phi178 = phi <2 x i32> [ %add187, %bb173 ], [ zeroinitializer, %bb ]
%add185 = add <2 x i32> %phi178, <i32 1, i32 1>
%add186 = add <2 x i32> %phi177, <i32 1, i32 1>
%shufflevector = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%add187 = add <2 x i32> %add185, %shufflevector
%shufflevector189 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%add190 = add <2 x i32> %add186, %shufflevector189
br i1 poison, label %bb193, label %bb173
bb193: ; preds = %bb173
%add194 = or <2 x i32> %add190, %add187
store volatile i32 0, ptr null, align 4
unreachable
}
; Check that the deinterleaving pass doesn't try to transform isolated patterns without a relevant deinterleaving pattern
define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
; CHECK-LABEL: check_deinterleaving_has_deinterleave:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: add x8, x0, #16
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: movi v4.2d, #0000000000000000
; CHECK-NEXT: movi v5.2d, #0000000000000000
; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: movi v6.2d, #0000000000000000
; CHECK-NEXT: movi v16.2d, #0000000000000000
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp q17, q18, [x8, #-16]
; CHECK-NEXT: subs x9, x9, #32
; CHECK-NEXT: add x8, x8, #32
; CHECK-NEXT: cmeq v17.16b, v17.16b, #0
; CHECK-NEXT: cmeq v18.16b, v18.16b, #0
; CHECK-NEXT: ushll2 v19.8h, v17.16b, #0
; CHECK-NEXT: ushll v17.8h, v17.8b, #0
; CHECK-NEXT: ushll2 v20.8h, v18.16b, #0
; CHECK-NEXT: ushll v18.8h, v18.8b, #0
; CHECK-NEXT: ushll v21.4s, v19.4h, #0
; CHECK-NEXT: ushll2 v19.4s, v19.8h, #0
; CHECK-NEXT: ushll v22.4s, v17.4h, #0
; CHECK-NEXT: ushll2 v17.4s, v17.8h, #0
; CHECK-NEXT: ushll2 v23.4s, v20.8h, #0
; CHECK-NEXT: ushll v24.4s, v18.4h, #0
; CHECK-NEXT: ushll2 v18.4s, v18.8h, #0
; CHECK-NEXT: ushll v20.4s, v20.4h, #0
; CHECK-NEXT: and v21.16b, v21.16b, v1.16b
; CHECK-NEXT: and v19.16b, v19.16b, v1.16b
; CHECK-NEXT: and v22.16b, v22.16b, v1.16b
; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
; CHECK-NEXT: and v23.16b, v23.16b, v1.16b
; CHECK-NEXT: and v24.16b, v24.16b, v1.16b
; CHECK-NEXT: and v18.16b, v18.16b, v1.16b
; CHECK-NEXT: and v20.16b, v20.16b, v1.16b
; CHECK-NEXT: add v4.4s, v4.4s, v19.4s
; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
; CHECK-NEXT: add v0.4s, v0.4s, v22.4s
; CHECK-NEXT: add v3.4s, v3.4s, v17.4s
; CHECK-NEXT: add v16.4s, v16.4s, v23.4s
; CHECK-NEXT: add v5.4s, v5.4s, v24.4s
; CHECK-NEXT: add v6.4s, v6.4s, v20.4s
; CHECK-NEXT: add v7.4s, v7.4s, v18.4s
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %middle.block
; CHECK-NEXT: add v1.4s, v7.4s, v3.4s
; CHECK-NEXT: add v3.4s, v16.4s, v4.4s
; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %9, %vector.body ]
%vec.phi50 = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
%next.gep = getelementptr i8, ptr %a, i64 %index
%4 = getelementptr i8, ptr %next.gep, i64 16
%wide.load = load <16 x i8>, ptr %next.gep, align 1
%wide.load51 = load <16 x i8>, ptr %4, align 1
%5 = icmp eq <16 x i8> %wide.load, zeroinitializer
%6 = icmp eq <16 x i8> %wide.load51, zeroinitializer
%7 = zext <16 x i1> %5 to <16 x i32>
%8 = zext <16 x i1> %6 to <16 x i32>
%9 = add <16 x i32> %vec.phi, %7
%10 = add <16 x i32> %vec.phi50, %8
%index.next = add nuw i64 %index, 32
%11 = icmp eq i64 %index.next, 32
br i1 %11, label %middle.block, label %vector.body
middle.block:
%bin.rdx = add <16 x i32> %10, %9
%12 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
ret i32 %12
}
|