File: sve-streaming-mode-fixed-length-ld2-alloca.ll

package info (click to toggle)
llvm-toolchain-18 1%3A18.1.8-18
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 1,908,340 kB
  • sloc: cpp: 6,667,937; ansic: 1,440,452; asm: 883,619; python: 230,549; objc: 76,880; f90: 74,238; lisp: 35,989; pascal: 16,571; sh: 10,229; perl: 7,459; ml: 5,047; awk: 3,523; makefile: 2,987; javascript: 2,149; xml: 892; fortran: 649; cs: 573
file content (135 lines) | stat: -rw-r--r-- 4,871 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible-sve  < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

declare void @def(ptr)

define void @alloc_v4i8(ptr %st_ptr) nounwind {
; CHECK-LABEL: alloc_v4i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #48
; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT:    mov x19, x0
; CHECK-NEXT:    add x0, sp, #28
; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT:    add x20, sp, #28
; CHECK-NEXT:    bl def
; CHECK-NEXT:    ptrue p0.b, vl2
; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x20]
; CHECK-NEXT:    ptrue p0.s, vl2
; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT:    mov z2.b, z0.b[1]
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    fmov w9, s2
; CHECK-NEXT:    stp w8, w9, [sp, #8]
; CHECK-NEXT:    ldr d0, [sp, #8]
; CHECK-NEXT:    st1b { z0.s }, p0, [x19]
; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT:    add sp, sp, #48
; CHECK-NEXT:    ret
  %alloc = alloca [4 x i8]
  call void @def(ptr %alloc)
  %load = load <4 x i8>, ptr %alloc
  %strided.vec = shufflevector <4 x i8> %load, <4 x i8> poison, <2 x i32> <i32 0, i32 2>
  store <2 x i8> %strided.vec, ptr %st_ptr
  ret void
}

define void @alloc_v6i8(ptr %st_ptr) nounwind {
; CHECK-LABEL: alloc_v6i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #48
; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT:    mov x19, x0
; CHECK-NEXT:    add x0, sp, #24
; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT:    add x20, sp, #24
; CHECK-NEXT:    bl def
; CHECK-NEXT:    ptrue p0.b, vl3
; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x20]
; CHECK-NEXT:    ptrue p0.h, vl4
; CHECK-NEXT:    mov z2.b, z1.b[3]
; CHECK-NEXT:    fmov w8, s1
; CHECK-NEXT:    mov z3.b, z1.b[2]
; CHECK-NEXT:    mov z4.b, z1.b[1]
; CHECK-NEXT:    strh w8, [sp]
; CHECK-NEXT:    fmov w8, s2
; CHECK-NEXT:    fmov w9, s3
; CHECK-NEXT:    strh w8, [sp, #6]
; CHECK-NEXT:    fmov w8, s4
; CHECK-NEXT:    strh w9, [sp, #4]
; CHECK-NEXT:    strh w8, [sp, #2]
; CHECK-NEXT:    add x8, sp, #12
; CHECK-NEXT:    ldr d0, [sp]
; CHECK-NEXT:    st1b { z0.h }, p0, [x8]
; CHECK-NEXT:    ldrh w8, [sp, #12]
; CHECK-NEXT:    strb w9, [x19, #2]
; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT:    strh w8, [x19]
; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT:    add sp, sp, #48
; CHECK-NEXT:    ret
  %alloc = alloca [6 x i8]
  call void @def(ptr %alloc)
  %load = load <6 x i8>, ptr %alloc
  %strided.vec = shufflevector <6 x i8> %load, <6 x i8> poison, <3 x i32> <i32 1, i32 3, i32 5>
  store <3 x i8> %strided.vec, ptr %st_ptr
  ret void
}

define void @alloc_v32i8(ptr %st_ptr) nounwind {
; CHECK-LABEL: alloc_v32i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #48
; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT:    mov x19, x0
; CHECK-NEXT:    mov x0, sp
; CHECK-NEXT:    bl def
; CHECK-NEXT:    adrp x8, .LCPI2_0
; CHECK-NEXT:    ldr q0, [sp]
; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT:    tbl z0.b, { z0.b }, z1.b
; CHECK-NEXT:    ldr q1, [sp, #16]
; CHECK-NEXT:    fmov w8, s1
; CHECK-NEXT:    strb w8, [x19, #8]
; CHECK-NEXT:    fmov x8, d0
; CHECK-NEXT:    str x8, [x19]
; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT:    add sp, sp, #48
; CHECK-NEXT:    ret
  %alloc = alloca [32 x i8]
  call void @def(ptr %alloc)
  %load = load <32 x i8>, ptr %alloc
  %strided.vec = shufflevector <32 x i8> %load, <32 x i8> poison, <9 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16>
  store <9 x i8> %strided.vec, ptr %st_ptr
  ret void
}


define void @alloc_v8f64(ptr %st_ptr) nounwind {
; CHECK-LABEL: alloc_v8f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #96
; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT:    mov x19, x0
; CHECK-NEXT:    mov x0, sp
; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT:    mov x20, sp
; CHECK-NEXT:    bl def
; CHECK-NEXT:    ptrue p0.d, vl2
; CHECK-NEXT:    mov x8, #4 // =0x4
; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x20]
; CHECK-NEXT:    ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3]
; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT:    stp q0, q2, [x19]
; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT:    add sp, sp, #96
; CHECK-NEXT:    ret
  %alloc = alloca [8 x double]
  call void @def(ptr %alloc)
  %load = load <8 x double>, ptr %alloc
  %strided.vec = shufflevector <8 x double> %load, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  store <4 x double> %strided.vec, ptr %st_ptr
  ret void
}