1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
|
; This test contains extremely tricky call graph structures for the inliner to
; handle correctly. They form cycles where the inliner introduces code that is
; immediately or can eventually be transformed back into the original code. And
; each step changes the call graph and so will trigger iteration. This requires
; some out-of-band way to prevent infinitely re-inlining and re-transforming the
; code.
;
; RUN: opt < %s -passes='cgscc(inline,function(sroa,instcombine))' -inline-threshold=50 -S | FileCheck %s
; The `test1_*` collection of functions form a directly cycling pattern.
define void @test1_a(i8** %ptr) {
; CHECK-LABEL: define void @test1_a(
entry:
call void @test1_b(i8* bitcast (void (i8*, i1, i32)* @test1_b to i8*), i1 false, i32 0)
; Inlining and simplifying this call will reliably produce the exact same call,
; over and over again. However, each inlining increments the count, and so we
; expect this test case to stop after one round of inlining with a final
; argument of '1'.
; CHECK-NOT: call
; CHECK: call void @test1_b(i8* bitcast (void (i8*, i1, i32)* @test1_b to i8*), i1 false, i32 1)
; CHECK-NOT: call
ret void
}
define void @test1_b(i8* %arg, i1 %flag, i32 %inline_count) {
; CHECK-LABEL: define void @test1_b(
entry:
%a = alloca i8*
store i8* %arg, i8** %a
; This alloca and store should remain through any optimization.
; CHECK: %[[A:.*]] = alloca
; CHECK: store i8* %arg, i8** %[[A]]
br i1 %flag, label %bb1, label %bb2
bb1:
call void @test1_a(i8** %a) noinline
br label %bb2
bb2:
%cast = bitcast i8** %a to void (i8*, i1, i32)**
%p = load void (i8*, i1, i32)*, void (i8*, i1, i32)** %cast
%inline_count_inc = add i32 %inline_count, 1
call void %p(i8* %arg, i1 %flag, i32 %inline_count_inc)
; And we should continue to load and call indirectly through optimization.
; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i1, i32)**
; CHECK: %[[P:.*]] = load void (i8*, i1, i32)*, void (i8*, i1, i32)** %[[CAST]]
; CHECK: call void %[[P]](
ret void
}
define void @test2_a(i8** %ptr) {
; CHECK-LABEL: define void @test2_a(
entry:
call void @test2_b(i8* bitcast (void (i8*, i8*, i1, i32)* @test2_b to i8*), i8* bitcast (void (i8*, i8*, i1, i32)* @test2_c to i8*), i1 false, i32 0)
; Inlining and simplifying this call will reliably produce the exact same call,
; but only after doing two rounds if inlining, first from @test2_b then
; @test2_c. We check the exact number of inlining rounds before we cut off to
; break the cycle by inspecting the last paramater that gets incremented with
; each inlined function body.
; CHECK-NOT: call
; CHECK: call void @test2_b(i8* bitcast (void (i8*, i8*, i1, i32)* @test2_b to i8*), i8* bitcast (void (i8*, i8*, i1, i32)* @test2_c to i8*), i1 false, i32 2)
; CHECK-NOT: call
ret void
}
define void @test2_b(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count) {
; CHECK-LABEL: define void @test2_b(
entry:
%a = alloca i8*
store i8* %arg2, i8** %a
; This alloca and store should remain through any optimization.
; CHECK: %[[A:.*]] = alloca
; CHECK: store i8* %arg2, i8** %[[A]]
br i1 %flag, label %bb1, label %bb2
bb1:
call void @test2_a(i8** %a) noinline
br label %bb2
bb2:
%p = load i8*, i8** %a
%cast = bitcast i8* %p to void (i8*, i8*, i1, i32)*
%inline_count_inc = add i32 %inline_count, 1
call void %cast(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count_inc)
; And we should continue to load and call indirectly through optimization.
; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i8*, i1, i32)**
; CHECK: %[[P:.*]] = load void (i8*, i8*, i1, i32)*, void (i8*, i8*, i1, i32)** %[[CAST]]
; CHECK: call void %[[P]](
ret void
}
define void @test2_c(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count) {
; CHECK-LABEL: define void @test2_c(
entry:
%a = alloca i8*
store i8* %arg1, i8** %a
; This alloca and store should remain through any optimization.
; CHECK: %[[A:.*]] = alloca
; CHECK: store i8* %arg1, i8** %[[A]]
br i1 %flag, label %bb1, label %bb2
bb1:
call void @test2_a(i8** %a) noinline
br label %bb2
bb2:
%p = load i8*, i8** %a
%cast = bitcast i8* %p to void (i8*, i8*, i1, i32)*
%inline_count_inc = add i32 %inline_count, 1
call void %cast(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count_inc)
; And we should continue to load and call indirectly through optimization.
; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i8*, i1, i32)**
; CHECK: %[[P:.*]] = load void (i8*, i8*, i1, i32)*, void (i8*, i8*, i1, i32)** %[[CAST]]
; CHECK: call void %[[P]](
ret void
}
; Another infinite inlining case. The initial callgraph is like following:
;
; test3_a <---> test3_b
; | ^
; v |
; test3_c <---> test3_d
;
; For all the call edges in the call graph, only test3_c and test3_d can be
; inlined into test3_a, and no other call edge can be inlined.
;
; After test3_c is inlined into test3_a, the original call edge test3_a->test3_c
; will be removed, a new call edge will be added and the call graph becomes:
;
; test3_a <---> test3_b
; \ ^
; v /
; test3_c <---> test3_d
; But test3_a, test3_b, test3_c and test3_d still belong to the same SCC.
;
; Then after test3_a->test3_d is inlined, when test3_a->test3_d is converted to
; a ref edge, the original SCC will be split into two: {test3_c, test3_d} and
; {test3_a, test3_b}, immediately after the newly added ref edge
; test3_a->test3_c will be converted to a call edge, and the two SCCs will be
; merged into the original one again. During this cycle, the original SCC will
; be added into UR.CWorklist again and this creates an infinite loop.
@a = global i64 0
@b = global i64 0
; Check test3_c is inlined into test3_a once and only once.
; CHECK-LABEL: @test3_a(
; CHECK: tail call void @test3_b()
; CHECK-NEXT: tail call void @test3_d(i32 5)
; CHECK-NEXT: %[[LD1:.*]] = load i64, i64* @a
; CHECK-NEXT: %[[ADD1:.*]] = add nsw i64 %[[LD1]], 1
; CHECK-NEXT: store i64 %[[ADD1]], i64* @a
; CHECK-NEXT: %[[LD2:.*]] = load i64, i64* @b
; CHECK-NEXT: %[[ADD2:.*]] = add nsw i64 %[[LD2]], 5
; CHECK-NEXT: store i64 %[[ADD2]], i64* @b
; CHECK-NEXT: ret void
; Function Attrs: noinline
define void @test3_a() #0 {
entry:
tail call void @test3_b()
tail call void @test3_c(i32 5)
%t0 = load i64, i64* @b
%add = add nsw i64 %t0, 5
store i64 %add, i64* @b
ret void
}
; Function Attrs: noinline
define void @test3_b() #0 {
entry:
tail call void @test3_a()
%t0 = load i64, i64* @a
%add = add nsw i64 %t0, 2
store i64 %add, i64* @a
ret void
}
define void @test3_d(i32 %i) {
entry:
%cmp = icmp eq i32 %i, 5
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
%call = tail call i64 @random()
%t0 = load i64, i64* @a
%add = add nsw i64 %t0, %call
store i64 %add, i64* @a
br label %if.end
if.end: ; preds = %entry, %if.then
tail call void @test3_c(i32 %i)
tail call void @test3_b()
%t6 = load i64, i64* @a
%add79 = add nsw i64 %t6, 3
store i64 %add79, i64* @a
ret void
}
define void @test3_c(i32 %i) {
entry:
%cmp = icmp eq i32 %i, 5
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
%call = tail call i64 @random()
%t0 = load i64, i64* @a
%add = add nsw i64 %t0, %call
store i64 %add, i64* @a
br label %if.end
if.end: ; preds = %entry, %if.then
tail call void @test3_d(i32 %i)
%t6 = load i64, i64* @a
%add85 = add nsw i64 %t6, 1
store i64 %add85, i64* @a
ret void
}
declare i64 @random()
attributes #0 = { noinline }
|