1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armv7a-none-eabi %s -o - | FileCheck %s
declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5)
define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; CHECK-LABEL: many_args_tail:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r0, #5
; CHECK-NEXT: mov r1, #2
; CHECK-NEXT: str r0, [sp]
; CHECK-NEXT: mov r0, #6
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: mov r3, #4
; CHECK-NEXT: b many_args_callee
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
ret i32 %ret
}
define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; CHECK-LABEL: many_args_musttail:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r0, #5
; CHECK-NEXT: mov r1, #2
; CHECK-NEXT: str r0, [sp]
; CHECK-NEXT: mov r0, #6
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: mov r3, #4
; CHECK-NEXT: b many_args_callee
%ret = musttail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
ret i32 %ret
}
; This function has more arguments than it's tail-callee. This isn't valid for
; the musttail attribute, but can still be tail-called as a non-guaranteed
; optimisation, because the outgoing arguments to @many_args_callee fit in the
; stack space allocated by the caller of @more_args_tail.
define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
; CHECK-LABEL: more_args_tail:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r0, #5
; CHECK-NEXT: mov r1, #2
; CHECK-NEXT: str r0, [sp]
; CHECK-NEXT: mov r0, #6
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: mov r3, #4
; CHECK-NEXT: b many_args_callee
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
ret i32 %ret
}
; Again, this isn't valid for musttail, but can be tail-called in practice
; because the stack size if the same.
define i32 @different_args_tail(i64 %0, i64 %1, i64 %2) {
; CHECK-LABEL: different_args_tail:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r0, #5
; CHECK-NEXT: mov r1, #2
; CHECK-NEXT: str r0, [sp]
; CHECK-NEXT: mov r0, #6
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: mov r3, #4
; CHECK-NEXT: b many_args_callee
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
ret i32 %ret
}
; Here, the caller requires less stack space for it's arguments than the
; callee, so it would not ba valid to do a tail-call.
define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
; CHECK-LABEL: fewer_args_tail:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: mov r1, #6
; CHECK-NEXT: mov r0, #5
; CHECK-NEXT: strd r0, r1, [sp]
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: mov r1, #2
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: mov r3, #4
; CHECK-NEXT: bl many_args_callee
; CHECK-NEXT: add sp, sp, #8
; CHECK-NEXT: pop {r11, pc}
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
ret i32 %ret
}
declare void @sret_callee(ptr sret({ double, double }) align 8)
; Functions which return by sret can be tail-called because the incoming sret
; pointer gets passed through to the callee.
define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) {
; CHECK-LABEL: sret_caller_tail:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: b sret_callee
entry:
tail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
ret void
}
define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) {
; CHECK-LABEL: sret_caller_musttail:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: b sret_callee
entry:
musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
ret void
}
; Clang only uses byval for arguments of 65 bytes or larger, but we test with a
; 20 byte struct to keep the tests more readable. This size was chosen to still
; make sure that it will be split between registers and the stack, to test all
; of the interesting code paths in the backend.
%twenty_bytes = type { [5 x i32] }
declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
; Functions with byval parameters can be tail-called, because the value is
; actually passed in registers and the stack in the same way for the caller and
; callee. Within @large_caller the first 16 bytes of the argument are spilled
; to the local stack frame, but for the tail-call they are passed in r0-r3, so
; it's safe to de-allocate that memory before the call.
; TODO: The SUB and STM instructions are unnecessary and could be optimised
; out, but the behaviour of this is still correct.
define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; CHECK-LABEL: large_caller:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: stm sp!, {r0, r1, r2, r3}
; CHECK-NEXT: b large_callee
entry:
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; As above, but with some inline asm to test that the arguments in r0-r3 are
; re-loaded before the call.
define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; CHECK-LABEL: large_caller_check_regs:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: stm sp, {r0, r1, r2, r3}
; CHECK-NEXT: @APP
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: pop {r0, r1, r2, r3}
; CHECK-NEXT: b large_callee
entry:
tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; The IR for this one looks dodgy, because it has an alloca passed to a
; musttail function, but it is passed as a byval argument, so will be copied
; into the stack space allocated by @large_caller_new_value's caller, so is
; valid.
define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; CHECK-LABEL: large_caller_new_value:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #36
; CHECK-NEXT: sub sp, sp, #36
; CHECK-NEXT: add r12, sp, #20
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, #4
; CHECK-NEXT: add r1, sp, #36
; CHECK-NEXT: str r0, [sp, #16]
; CHECK-NEXT: mov r0, #3
; CHECK-NEXT: str r0, [sp, #12]
; CHECK-NEXT: mov r0, #2
; CHECK-NEXT: str r0, [sp, #8]
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: str r0, [sp]
; CHECK-NEXT: mov r0, sp
; CHECK-NEXT: add r0, r0, #16
; CHECK-NEXT: mov r3, #3
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: str r2, [r1], #4
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r1, #1
; CHECK-NEXT: mov r2, #2
; CHECK-NEXT: add sp, sp, #36
; CHECK-NEXT: b large_callee
entry:
%y = alloca %twenty_bytes, align 4
store i32 0, ptr %y, align 4
%0 = getelementptr inbounds i8, ptr %y, i32 4
store i32 1, ptr %0, align 4
%1 = getelementptr inbounds i8, ptr %y, i32 8
store i32 2, ptr %1, align 4
%2 = getelementptr inbounds i8, ptr %y, i32 12
store i32 3, ptr %2, align 4
%3 = getelementptr inbounds i8, ptr %y, i32 16
store i32 4, ptr %3, align 4
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y)
ret void
}
declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4)
define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
; CHECK-LABEL: swap_byvals:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, sp, #40
; CHECK-NEXT: add r12, sp, #56
; CHECK-NEXT: add lr, sp, #20
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
; CHECK-NEXT: add r0, sp, #56
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: mov r2, r12
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: add r3, sp, #20
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: add r4, sp, #76
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: add r0, sp, #76
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: mov r2, lr
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldr r1, [r0], #4
; CHECK-NEXT: str r1, [r2], #4
; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
; CHECK-NEXT: ldr r5, [r12], #4
; CHECK-NEXT: str r5, [r4], #4
; CHECK-NEXT: ldr r5, [r12], #4
; CHECK-NEXT: str r5, [r4], #4
; CHECK-NEXT: ldr r5, [r12], #4
; CHECK-NEXT: str r5, [r4], #4
; CHECK-NEXT: ldr r5, [r12], #4
; CHECK-NEXT: str r5, [r4], #4
; CHECK-NEXT: ldr r5, [r12], #4
; CHECK-NEXT: str r5, [r4], #4
; CHECK-NEXT: add r5, lr, #16
; CHECK-NEXT: add r12, sp, #72
; CHECK-NEXT: ldr r4, [r5], #4
; CHECK-NEXT: str r4, [r12], #4
; CHECK-NEXT: add sp, sp, #40
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: b two_byvals_callee
entry:
musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; A forwarded byval arg, but at a different offset on the stack, so it needs to
; be copied to the local stack frame first. This can't be musttail because of
; the different signatures, but is still tail-called as an optimisation.
declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
; CHECK-LABEL: shift_byval:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #12
; CHECK-NEXT: sub sp, sp, #12
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .pad #20
; CHECK-NEXT: sub sp, sp, #20
; CHECK-NEXT: add r0, sp, #28
; CHECK-NEXT: add lr, sp, #40
; CHECK-NEXT: stm r0, {r1, r2, r3}
; CHECK-NEXT: add r0, sp, #28
; CHECK-NEXT: mov r1, sp
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: add r12, r1, #16
; CHECK-NEXT: str r2, [r1], #4
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: str r2, [r1], #4
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: str r2, [r1], #4
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: str r2, [r1], #4
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: str r2, [r1], #4
; CHECK-NEXT: ldm sp, {r0, r1, r2, r3}
; CHECK-NEXT: ldr r4, [r12], #4
; CHECK-NEXT: str r4, [lr], #4
; CHECK-NEXT: add sp, sp, #20
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: add sp, sp, #12
; CHECK-NEXT: b shift_byval_callee
entry:
tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
ret void
}
; A global object passed to a byval argument, so it must be copied, but doesn't
; need a stack temporary.
@large_global = external global %twenty_bytes
define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; CHECK-LABEL: large_caller_from_global:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: add r12, sp, #8
; CHECK-NEXT: add lr, sp, #24
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
; CHECK-NEXT: movw r3, :lower16:large_global
; CHECK-NEXT: movt r3, :upper16:large_global
; CHECK-NEXT: add r12, r3, #16
; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
; CHECK-NEXT: ldr r4, [r12], #4
; CHECK-NEXT: str r4, [lr], #4
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: b large_callee
entry:
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
ret void
}
|