1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
|
; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck %s
; Shrink wrapping currently does not kick in because we have a TLS CALL
; in the entry block and it will clobber the link register.
; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -fast-isel | FileCheck --check-prefix=CHECK-O0 %s
%struct.S = type { i8 }
@sg = internal thread_local global %struct.S zeroinitializer, align 1
@__dso_handle = external global i8
@__tls_guard = internal thread_local unnamed_addr global i1 false
@sum1 = internal thread_local global i32 0, align 4
declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
%.b.i = load i1, i1* @__tls_guard, align 1
br i1 %.b.i, label %__tls_init.exit, label %init.i
init.i:
store i1 true, i1* @__tls_guard, align 1
%call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg)
%1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle)
br label %__tls_init.exit
__tls_init.exit:
ret %struct.S* @sg
}
; CHECK-LABEL: _ZTW2sg
; CHECK-NOT: stp d31, d30
; CHECK-NOT: stp d29, d28
; CHECK-NOT: stp d27, d26
; CHECK-NOT: stp d25, d24
; CHECK-NOT: stp d23, d22
; CHECK-NOT: stp d21, d20
; CHECK-NOT: stp d19, d18
; CHECK-NOT: stp d17, d16
; CHECK-NOT: stp d7, d6
; CHECK-NOT: stp d5, d4
; CHECK-NOT: stp d3, d2
; CHECK-NOT: stp d1, d0
; CHECK-NOT: stp x20, x19
; FIXME: The splitting logic in the register allocator fails to split along
; control flow here, we used to get this right by accident before...
; CHECK-NOTXX: stp x14, x13
; CHECK-NOT: stp x12, x11
; CHECK-NOT: stp x10, x9
; CHECK-NOT: stp x8, x7
; CHECK-NOT: stp x6, x5
; CHECK-NOT: stp x4, x3
; CHECK-NOT: stp x2, x1
; CHECK: blr
; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
; CHECK: blr
; CHECK: tlv_atexit
; CHECK: [[BB_end]]:
; CHECK: blr
; CHECK-NOT: ldp x2, x1
; CHECK-NOT: ldp x4, x3
; CHECK-NOT: ldp x6, x5
; CHECK-NOT: ldp x8, x7
; CHECK-NOT: ldp x10, x9
; CHECK-NOT: ldp x12, x11
; CHECK-NOTXX: ldp x14, x13
; CHECK-NOT: ldp x20, x19
; CHECK-NOT: ldp d1, d0
; CHECK-NOT: ldp d3, d2
; CHECK-NOT: ldp d5, d4
; CHECK-NOT: ldp d7, d6
; CHECK-NOT: ldp d17, d16
; CHECK-NOT: ldp d19, d18
; CHECK-NOT: ldp d21, d20
; CHECK-NOT: ldp d23, d22
; CHECK-NOT: ldp d25, d24
; CHECK-NOT: ldp d27, d26
; CHECK-NOT: ldp d29, d28
; CHECK-NOT: ldp d31, d30
; CHECK-O0-LABEL: _ZTW2sg
; CHECK-O0: stp d31, d30
; CHECK-O0: stp d29, d28
; CHECK-O0: stp d27, d26
; CHECK-O0: stp d25, d24
; CHECK-O0: stp d23, d22
; CHECK-O0: stp d21, d20
; CHECK-O0: stp d19, d18
; CHECK-O0: stp d17, d16
; CHECK-O0: stp d7, d6
; CHECK-O0: stp d5, d4
; CHECK-O0: stp d3, d2
; CHECK-O0: stp d1, d0
; CHECK-O0: str x14
; CHECK-O0: stp x13, x12
; CHECK-O0: stp x11, x10
; CHECK-O0: stp x8, x7
; CHECK-O0: stp x6, x5
; CHECK-O0: stp x4, x3
; CHECK-O0: stp x2, x1
; CHECK-O0: blr
; CHECK-O0: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
; CHECK-O0: blr
; CHECK-O0: tlv_atexit
; CHECK-O0: [[BB_end]]:
; CHECK-O0: blr
; CHECK-O0: ldp x2, x1
; CHECK-O0: ldp x4, x3
; CHECK-O0: ldp x6, x5
; CHECK-O0: ldp x8, x7
; CHECK-O0: ldp x11, x10
; CHECK-O0: ldp x13, x12
; CHECK-O0: ldr x14
; CHECK-O0: ldp d1, d0
; CHECK-O0: ldp d3, d2
; CHECK-O0: ldp d5, d4
; CHECK-O0: ldp d7, d6
; CHECK-O0: ldp d17, d16
; CHECK-O0: ldp d19, d18
; CHECK-O0: ldp d21, d20
; CHECK-O0: ldp d23, d22
; CHECK-O0: ldp d25, d24
; CHECK-O0: ldp d27, d26
; CHECK-O0: ldp d29, d28
; CHECK-O0: ldp d31, d30
; CHECK-LABEL: _ZTW4sum1
; CHECK-NOT: stp d31, d30
; CHECK-NOT: stp d29, d28
; CHECK-NOT: stp d27, d26
; CHECK-NOT: stp d25, d24
; CHECK-NOT: stp d23, d22
; CHECK-NOT: stp d21, d20
; CHECK-NOT: stp d19, d18
; CHECK-NOT: stp d17, d16
; CHECK-NOT: stp d7, d6
; CHECK-NOT: stp d5, d4
; CHECK-NOT: stp d3, d2
; CHECK-NOT: stp d1, d0
; CHECK-NOT: stp x20, x19
; CHECK-NOT: stp x14, x13
; CHECK-NOT: stp x12, x11
; CHECK-NOT: stp x10, x9
; CHECK-NOT: stp x8, x7
; CHECK-NOT: stp x6, x5
; CHECK-NOT: stp x4, x3
; CHECK-NOT: stp x2, x1
; CHECK: blr
; CHECK-O0-LABEL: _ZTW4sum1
; CHECK-O0-NOT: vstr
; CHECK-O0-NOT: vldr
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
ret i32* @sum1
}
; Make sure at O0, we don't generate spilling/reloading of the CSRs.
; CHECK-O0-LABEL: tls_test2
; CHECK-O0-NOT: stp d31, d30
; CHECK-O0-NOT: stp d29, d28
; CHECK-O0-NOT: stp d27, d26
; CHECK-O0-NOT: stp d25, d24
; CHECK-O0-NOT: stp d23, d22
; CHECK-O0-NOT: stp d21, d20
; CHECK-O0-NOT: stp d19, d18
; CHECK-O0-NOT: stp d17, d16
; CHECK-O0-NOT: stp d7, d6
; CHECK-O0-NOT: stp d5, d4
; CHECK-O0-NOT: stp d3, d2
; CHECK-O0-NOT: stp d1, d0
; CHECK-O0-NOT: stp x20, x19
; CHECK-O0-NOT: stp x14, x13
; CHECK-O0-NOT: stp x12, x11
; CHECK-O0-NOT: stp x10, x9
; CHECK-O0-NOT: stp x8, x7
; CHECK-O0-NOT: stp x6, x5
; CHECK-O0-NOT: stp x4, x3
; CHECK-O0-NOT: stp x2, x1
; CHECK-O0: bl {{.*}}tls_helper
; CHECK-O0-NOT: ldp x2, x1
; CHECK-O0-NOT: ldp x4, x3
; CHECK-O0-NOT: ldp x6, x5
; CHECK-O0-NOT: ldp x8, x7
; CHECK-O0-NOT: ldp x10, x9
; CHECK-O0-NOT: ldp x12, x11
; CHECK-O0-NOT: ldp x14, x13
; CHECK-O0-NOT: ldp x20, x19
; CHECK-O0-NOT: ldp d1, d0
; CHECK-O0-NOT: ldp d3, d2
; CHECK-O0-NOT: ldp d5, d4
; CHECK-O0-NOT: ldp d7, d6
; CHECK-O0-NOT: ldp d17, d16
; CHECK-O0-NOT: ldp d19, d18
; CHECK-O0-NOT: ldp d21, d20
; CHECK-O0-NOT: ldp d23, d22
; CHECK-O0-NOT: ldp d25, d24
; CHECK-O0-NOT: ldp d27, d26
; CHECK-O0-NOT: ldp d29, d28
; CHECK-O0-NOT: ldp d31, d30
; CHECK-O0: ret
%class.C = type { i32 }
@tC = internal thread_local global %class.C zeroinitializer, align 4
declare cxx_fast_tlscc void @tls_helper()
define cxx_fast_tlscc %class.C* @tls_test2() #1 {
call cxx_fast_tlscc void @tls_helper()
ret %class.C* @tC
}
; Make sure we do not allow tail call when caller and callee have different
; calling conventions.
declare %class.C* @_ZN1CD1Ev(%class.C* readnone returned %this)
; CHECK-LABEL: tls_test
; CHECK: bl __tlv_atexit
define cxx_fast_tlscc void @__tls_test() {
entry:
store i32 0, i32* getelementptr inbounds (%class.C, %class.C* @tC, i64 0, i32 0), align 4
%0 = tail call i32 @_tlv_atexit(void (i8*)* bitcast (%class.C* (%class.C*)* @_ZN1CD1Ev to void (i8*)*), i8* bitcast (%class.C* @tC to i8*), i8* nonnull @__dso_handle) #1
ret void
}
define cxx_fast_tlscc void @weird_prologue_regs(i32 %n) #1 {
; CHECK-LABEL: weird_prologue_regs:
; CHECK-NOT: str x9
; CHECK-NOT: stp{{.*}}x9{{.*}}[
; CHECK-NOT: str x19
; CHECK-NOT: stp{{.*}}x19{{.*}}[
; CHECK: sub x9, sp, #
; CHECK: and sp, x9, #0x
; CHECK: mov x19, sp
; CHECK-NOT: str x9
; CHECK-NOT: stp{{.*}}x9{{.*}}[
; CHECK-NOT: str x19
; CHECK-NOT: stp{{.*}}x19{{.*}}[
%p0 = alloca i32, i32 200
%p1 = alloca i32, align 32
%p2 = alloca i32, i32 %n
call void @callee(i32* %p0)
call void @callee(i32* %p1)
call void @callee(i32* %p2)
ret void
}
declare void @callee(i32*)
attributes #0 = { nounwind "frame-pointer"="all" }
attributes #1 = { nounwind }
|