File: early_exit_costs.ll

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,245,028 kB
  • sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,675; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (121 lines) | stat: -rw-r--r-- 4,559 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
; REQUIRES: asserts
; RUN: opt -S < %s -p loop-vectorize -disable-output \
; RUN:   -debug-only=loop-vectorize 2>&1 | FileCheck %s --check-prefixes=CHECK

target triple = "aarch64-unknown-linux-gnu"

declare void @init_mem(ptr, i64);

define i64 @same_exit_block_pre_inc_use1_sve() #1 {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_sve'
; CHECK: LV: Selecting VF: vscale x 16
; CHECK: Calculating cost of work in exit block vector.early.exit
; CHECK-NEXT: Cost of 4 for VF vscale x 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF vscale x 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF vscale x 16: vp<{{.*}}> = DERIVED-IV
; CHECK-NEXT: Cost of 4 for VF vscale x 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF vscale x 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF vscale x 16: vp<{{.*}}> = DERIVED-IV
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
  %p1 = alloca [1024 x i8]
  %p2 = alloca [1024 x i8]
  call void @init_mem(ptr %p1, i64 1024)
  call void @init_mem(ptr %p2, i64 1024)
  br label %loop

loop:
  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
  %index2 = phi i64 [ %index2.next, %loop.inc ], [ 15, %entry ]
  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
  %ld1 = load i8, ptr %arrayidx, align 1
  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
  %ld2 = load i8, ptr %arrayidx1, align 1
  %cmp3 = icmp eq i8 %ld1, %ld2
  br i1 %cmp3, label %loop.inc, label %loop.end

loop.inc:
  %index.next = add i64 %index, 1
  %index2.next = add i64 %index2, 2
  %exitcond = icmp ne i64 %index.next, 67
  br i1 %exitcond, label %loop, label %loop.end

loop.end:
  %val1 = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
  %val2 = phi i64 [ %index2, %loop ], [ 98, %loop.inc ]
  %retval = add i64 %val1, %val2
  ret i64 %retval
}

define i64 @same_exit_block_pre_inc_use1_nosve() {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_nosve'
; CHECK: LV: Selecting VF: 16
; CHECK: Calculating cost of work in exit block vector.early.exit
; CHECK-NEXT: Cost of 48 for VF 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF 16: vp<{{.*}}> = DERIVED-IV
; CHECK-NEXT: Cost of 48 for VF 16: EMIT vp<{{.*}}> = first-active-lane ir<%cmp3>
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.*}}> = add
; CHECK-NEXT: Cost of 0 for VF 16: vp<{{.*}}> = DERIVED-IV
; CHECK: LV: Minimum required TC for runtime checks to be profitable:160
; CHECK-NEXT: LV: Vectorization is not beneficial: expected trip count < minimum profitable VF (64 < 160)
; CHECK-NEXT: LV: Too many memory checks needed.
entry:
  %p1 = alloca [1024 x i8]
  %p2 = alloca [1024 x i8]
  call void @init_mem(ptr %p1, i64 1024)
  call void @init_mem(ptr %p2, i64 1024)
  br label %loop

loop:
  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
  %index2 = phi i64 [ %index2.next, %loop.inc ], [ 15, %entry ]
  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
  %ld1 = load i8, ptr %arrayidx, align 1
  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
  %ld2 = load i8, ptr %arrayidx1, align 1
  %cmp3 = icmp eq i8 %ld1, %ld2
  br i1 %cmp3, label %loop.inc, label %loop.end

loop.inc:
  %index.next = add i64 %index, 1
  %index2.next = add i64 %index2, 2
  %exitcond = icmp ne i64 %index.next, 67
  br i1 %exitcond, label %loop, label %loop.end

loop.end:
  %val1 = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
  %val2 = phi i64 [ %index2, %loop ], [ 98, %loop.inc ]
  %retval = add i64 %val1, %val2
  ret i64 %retval
}

define i64 @vectorization_not_profitable_due_to_trunc(ptr dereferenceable(800) %src) {
; CHECK-LABEL: LV: Checking a loop in 'vectorization_not_profitable_due_to_trunc'
; CHECK: LV: Selecting VF: 1.
; CHECK-NEXT: Calculating cost of work in exit block vector.early.exit:
; CHECK-NEXT: LV: Vectorization is possible but not beneficial.
entry:
  br label %loop.header

loop.header:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
  %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv
  %l = load i64, ptr %gep.src, align 1
  %t = trunc i64 %l to i1
  br i1 %t, label %exit.0, label %loop.latch

loop.latch:
  %iv.next = add i64 %iv, 1
  %ec = icmp eq i64 %iv.next, 100
  br i1 %ec, label %exit.1, label %loop.header

exit.0:
  %res = phi i64 [ %l, %loop.header ]
  ret i64 %res

exit.1:
  ret i64 0
}

attributes #1 = { "target-features"="+sve" vscale_range(1,16) }