1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=NORMAL -check-prefix=NORMALFP
; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=NOPUSH
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=NOPUSH -check-prefix=NORMALFP
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
declare void @seven_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g)
declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
declare void @eightparams16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h)
declare void @eightparams64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
declare void @ten_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i64 %h, i32 %i, i64 %j)
declare void @ten_params_ptr(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, ptr %h, i32 %i, i64 %j)
declare void @cannot_push(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i)
; We should get pushes for the last 4 parameters. Test that the
; in-register parameters are all in the right places, and check
; that the stack manipulations are correct and correctly
; described by the DWARF directives. Test that the switch
; to disable the optimization works and that the optimization
; doesn't kick in on Windows64 where it is not allowed.
; NORMAL-LABEL: test1
; NORMAL: pushq
; NORMAL-DAG: movl $1, %edi
; NORMAL-DAG: movl $2, %esi
; NORMAL-DAG: movl $3, %edx
; NORMAL-DAG: movl $4, %ecx
; NORMAL-DAG: movl $5, %r8d
; NORMAL-DAG: movl $6, %r9d
; NORMAL: pushq $10
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: pushq $9
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: pushq $8
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: pushq $7
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: callq ten_params
; NORMAL: addq $32, %rsp
; NORMAL: .cfi_adjust_cfa_offset -32
; NORMAL: popq
; NORMAL: retq
; NOPUSH-LABEL: test1
; NOPUSH-NOT: pushq
; NOPUSH: retq
define void @test1() {
entry:
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
ret void
}
; The presence of a frame pointer should not prevent pushes. But we
; don't need the CFI directives in that case.
; Also check that we generate the right pushes for >8bit immediates.
; NORMALFP-LABEL: test2
; NORMALFP: pushq $10000
; NORMALFP-NEXT: pushq $9000
; NORMALFP-NEXT: pushq $8000
; NORMALFP-NEXT: pushq $7000
; NORMALFP-NEXT: callq {{_?}}ten_params
define void @test2(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7000, i64 8000, i32 9000, i64 10000)
ret void
}
; Parameters 7 & 8 should push a 64-bit register.
; TODO: Note that the regular expressions disallow r8 and r9. That's fine for
; now, because the pushes will always follow the moves into r8 and r9.
; Eventually, though, we want to be able to schedule the pushes better.
; In this example, it will save two copies, because we have to move the
; incoming parameters out of %rdi and %rsi to make room for the outgoing
; parameters.
; NORMAL-LABEL: test3
; NORMAL: pushq $10000
; NORMAL: pushq $9000
; NORMAL: pushq %r{{..}}
; NORMAL: pushq %r{{..}}
; NORMAL: callq ten_params
define void @test3(i32 %a, i64 %b) {
entry:
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %a, i64 %b, i32 9000, i64 10000)
ret void
}
; Check that we avoid the optimization for just one push.
; NORMAL-LABEL: test4
; NORMAL: movl $7, (%rsp)
; NORMAL: callq seven_params
define void @test4() {
entry:
call void @seven_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7)
ret void
}
; Check that pushing link-time constant addresses works correctly
; NORMAL-LABEL: test5
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq $ext
; NORMAL: pushq $7
; NORMAL: callq ten_params_ptr
@ext = external dso_local constant i8
define void @test5() {
entry:
call void @ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, ptr @ext, i32 9, i64 10)
ret void
}
; Check that we fuse 64-bit loads but not 32-bit loads into PUSH mem.
; NORMAL-LABEL: test6
; NORMAL: movq %rsi, [[REG64:%.+]]
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq ([[REG64]])
; NORMAL: pushq {{%r..}}
; NORMAL: callq ten_params
define void @test6(ptr %p32, ptr %p64) {
entry:
%v32 = load i32, ptr %p32
%v64 = load i64, ptr %p64
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %v32, i64 %v64, i32 9, i64 10)
ret void
}
; Fold stack-relative loads into the push with correct offsets.
; Do the same for an indirect call whose address is loaded from the stack.
; On entry, %p7 is at 8(%rsp) and %p8 is at 16(%rsp). Prior to the call
; sequence, 72 bytes are allocated to the stack, 48 for register saves and
; 24 for local storage and alignment, so %p7 is at 80(%rsp) and %p8 is at
; 88(%rsp). The call address can be stored anywhere in the local space but
; happens to be stored at 8(%rsp). Each push bumps these offsets up by
; 8 bytes.
; NORMAL-LABEL: test7
; NORMAL: movq %r{{.*}}, 8(%rsp) {{.*Spill$}}
; NORMAL: pushq 88(%rsp)
; NORMAL: pushq $9
; NORMAL: pushq 96(%rsp)
; NORMAL: pushq $7
; NORMAL: callq *40(%rsp)
define void @test7(i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8) {
entry:
%stack_fptr = alloca ptr
store ptr @ten_params, ptr %stack_fptr
%ten_params_ptr = load volatile ptr, ptr %stack_fptr
call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
call void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64) %ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %p7, i32 9, i64 %p8)
ret void
}
; We can't fold the load from the global into the push because of
; interference from the store
; NORMAL-LABEL: test8
; NORMAL: movq the_global(%rip), [[REG:%r.+]]
; NORMAL: movq $42, the_global
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq [[REG]]
; NORMAL: pushq $7
; NORMAL: callq ten_params
@the_global = external dso_local global i64
define void @test8() {
%myload = load i64, ptr @the_global
store i64 42, ptr @the_global
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %myload, i32 9, i64 10)
ret void
}
; Converting one function call to use pushes negatively affects
; other calls that pass arguments on the stack without pushes.
; If the cost outweighs the benefit, avoid using pushes.
; NORMAL-LABEL: test9
; NORMAL: callq cannot_push
; NORMAL-NOT: push
; NORMAL: callq ten_params
define void @test9(float %p1) {
call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
ret void
}
; But if the benefit outweighs the cost, use pushes.
; NORMAL-LABEL: test10
; NORMAL: callq cannot_push
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq $8
; NORMAL: pushq $7
; NORMAL: callq ten_params
define void @test10(float %p1) {
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
ret void
}
; NORMAL-LABEL: pr34863_16
; NORMAL: pushq ${{-1|65535}}
; NORMAL-NEXT: pushq $0
; NORMAL-NEXT: call
define void @pr34863_16(i16 %x) minsize nounwind {
entry:
tail call void @eightparams16(i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 0, i16 -1)
ret void
}
; NORMAL-LABEL: pr34863_32
; NORMAL: pushq ${{-1|65535}}
; NORMAL-NEXT: pushq $0
; NORMAL-NEXT: call
define void @pr34863_32(i32 %x) minsize nounwind {
entry:
tail call void @eightparams(i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 0, i32 -1)
ret void
}
; NORMAL-LABEL: pr34863_64
; NORMAL: pushq ${{-1|65535}}
; NORMAL-NEXT: pushq $0
; NORMAL-NEXT: call
define void @pr34863_64(i64 %x) minsize nounwind {
entry:
tail call void @eightparams64(i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 0, i64 -1)
ret void
}
|