File: sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.6-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,304 kB
  • sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (95 lines) | stat: -rw-r--r-- 4,245 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s

target triple = "aarch64"

; This function would normally scavenge a stackslot from the callee-save
; area, which would lead to spilling 's0' to that stackslot before the
; smstop and filling it with 'addvl + <offset>' after the smstop because
; the frame-pointer is not available.
; This would not be valid, since the vector-length has changed so 'addvl'
; cannot be used. This is testing that the stackslot-scavenging is disabled
; when there are any streaming-mode-changing call-sequences in the
; function.
define void @test_no_stackslot_scavenging(float %f) #0 {
; CHECK-LABEL: test_no_stackslot_scavenging:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT:    cntd x9
; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT:    stp x9, x24, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT:    sub sp, sp, #16
; CHECK-NEXT:    addvl sp, sp, #-1
; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT:    //APP
; CHECK-NEXT:    //NO_APP
; CHECK-NEXT:    smstop sm
; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT:    bl use_f
; CHECK-NEXT:    smstart sm
; CHECK-NEXT:    addvl sp, sp, #1
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT:    ldr x24, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT:    ret
  %ptr = alloca <vscale x 16 x i8>
  call void asm sideeffect "", "~{x24}"() nounwind
  call void @use_f(float %f)
  ret void
}

define void @test_no_stackslot_scavenging_with_fp(float %f, i64 %n) #0 "frame-pointer"="all" {
; CHECK-LABEL: test_no_stackslot_scavenging_with_fp:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stp d15, d14, [sp, #-128]! // 16-byte Folded Spill
; CHECK-NEXT:    cntd x9
; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT:    add x29, sp, #64
; CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT:    stp x28, x25, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT:    stp x24, x19, [sp, #112] // 16-byte Folded Spill
; CHECK-NEXT:    addvl sp, sp, #-1
; CHECK-NEXT:    lsl x9, x0, #3
; CHECK-NEXT:    mov x8, sp
; CHECK-NEXT:    mov x19, sp
; CHECK-NEXT:    str s0, [x29, #28] // 4-byte Folded Spill
; CHECK-NEXT:    add x9, x9, #15
; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT:    sub x8, x8, x9
; CHECK-NEXT:    mov sp, x8
; CHECK-NEXT:    //APP
; CHECK-NEXT:    //NO_APP
; CHECK-NEXT:    smstop sm
; CHECK-NEXT:    ldr s0, [x29, #28] // 4-byte Folded Reload
; CHECK-NEXT:    bl use_f
; CHECK-NEXT:    smstart sm
; CHECK-NEXT:    sub sp, x29, #64
; CHECK-NEXT:    ldp x24, x19, [sp, #112] // 16-byte Folded Reload
; CHECK-NEXT:    ldp x28, x25, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT:    ldp d15, d14, [sp], #128 // 16-byte Folded Reload
; CHECK-NEXT:    ret
  %ptr2 = alloca i64, i64 %n, align 8
  %ptr = alloca <vscale x 16 x i8>
  call void asm sideeffect "", "~{x24},~{x25}"() nounwind
  call void @use_f(float %f)
  ret void
}

declare void @use_f(float)
declare void @use_f_and_ptr(float, ptr)

attributes #0 = { nounwind "target-features"="+sve,+sme" "aarch64_pstate_sm_enabled" }