1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
; REQUIRES: asserts
; RUN: opt -S < %s -p loop-vectorize -disable-output \
; RUN: -debug-only=loop-vectorize 2>&1 | FileCheck %s --check-prefixes=CHECK
target triple = "aarch64-unknown-linux-gnu"
declare void @init_mem(ptr, i64);
define i64 @same_exit_block_pre_inc_use1_sve() #1 {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_sve'
; CHECK: LV: Selecting VF: vscale x 16
; CHECK: Calculating cost of work in exit block vector.early.exit
; CHECK-NEXT: Cost of 4 for VF vscale x 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF vscale x 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF vscale x 16: vp<{{.*}}> = DERIVED-IV
; CHECK-NEXT: Cost of 4 for VF vscale x 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF vscale x 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF vscale x 16: vp<{{.*}}> = DERIVED-IV
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
call void @init_mem(ptr %p2, i64 1024)
br label %loop
loop:
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
%index2 = phi i64 [ %index2.next, %loop.inc ], [ 15, %entry ]
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
%ld1 = load i8, ptr %arrayidx, align 1
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
%ld2 = load i8, ptr %arrayidx1, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.inc, label %loop.end
loop.inc:
%index.next = add i64 %index, 1
%index2.next = add i64 %index2, 2
%exitcond = icmp ne i64 %index.next, 67
br i1 %exitcond, label %loop, label %loop.end
loop.end:
%val1 = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
%val2 = phi i64 [ %index2, %loop ], [ 98, %loop.inc ]
%retval = add i64 %val1, %val2
ret i64 %retval
}
define i64 @same_exit_block_pre_inc_use1_nosve() {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_nosve'
; CHECK: LV: Selecting VF: 16
; CHECK: Calculating cost of work in exit block vector.early.exit
; CHECK-NEXT: Cost of 48 for VF 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF 16: vp<{{.*}}> = DERIVED-IV
; CHECK-NEXT: Cost of 48 for VF 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF 16: vp<{{.*}}> = DERIVED-IV
; CHECK: LV: Minimum required TC for runtime checks to be profitable:160
; CHECK-NEXT: LV: Vectorization is not beneficial: expected trip count < minimum profitable VF (64 < 160)
; CHECK-NEXT: LV: Too many memory checks needed.
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
call void @init_mem(ptr %p2, i64 1024)
br label %loop
loop:
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
%index2 = phi i64 [ %index2.next, %loop.inc ], [ 15, %entry ]
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
%ld1 = load i8, ptr %arrayidx, align 1
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
%ld2 = load i8, ptr %arrayidx1, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.inc, label %loop.end
loop.inc:
%index.next = add i64 %index, 1
%index2.next = add i64 %index2, 2
%exitcond = icmp ne i64 %index.next, 67
br i1 %exitcond, label %loop, label %loop.end
loop.end:
%val1 = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
%val2 = phi i64 [ %index2, %loop ], [ 98, %loop.inc ]
%retval = add i64 %val1, %val2
ret i64 %retval
}
define i64 @vectorization_not_profitable_due_to_trunc(ptr dereferenceable(800) %src) {
; CHECK-LABEL: LV: Checking a loop in 'vectorization_not_profitable_due_to_trunc'
; CHECK: LV: Selecting VF: 1.
; CHECK-NEXT: Calculating cost of work in exit block vector.early.exit:
; CHECK-NEXT: LV: Vectorization is possible but not beneficial.
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i64, ptr %src, i64 %iv
%l = load i64, ptr %gep.src, align 1
%t = trunc i64 %l to i1
br i1 %t, label %exit.0, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 100
br i1 %ec, label %exit.1, label %loop.header
exit.0:
%res = phi i64 [ %l, %loop.header ]
ret i64 %res
exit.1:
ret i64 0
}
attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
|