File: load-store-opt-scc.mir

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (155 lines) | stat: -rw-r--r-- 5,611 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s

# The purpose of this test is to make sure we are combining relevant memory
# operations correctly with/without SCC bit.

--- |
  define amdgpu_kernel void @test1(ptr addrspace(1) %out) {
    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
    store i32 123, ptr addrspace(1) %out.gep.1
    store i32 456, ptr addrspace(1) %out
    ret void
  }

  define amdgpu_kernel void @test2(ptr addrspace(1) %out) {
    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
    store i32 123, ptr addrspace(1) %out.gep.1
    store i32 456, ptr addrspace(1) %out
    ret void
  }

  define amdgpu_kernel void @test3(ptr addrspace(1) %out) {
    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
    store i32 123, ptr addrspace(1) %out.gep.1
    store i32 456, ptr addrspace(1) %out
    ret void
  }
  define amdgpu_kernel void @test4(ptr addrspace(1) %out) {
    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
    store i32 123, ptr addrspace(1) %out.gep.1
    store i32 456, ptr addrspace(1) %out
    ret void
  }
...

# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1)
---
name: test1
liveins:
  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
body: |
  bb.0 (%ir-block.0):
    liveins: $sgpr0_sgpr1

    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
    $vgpr1 = V_MOV_B32_e32 456, implicit $exec

    $sgpr2 = S_MOV_B32 -1
    $sgpr3 = S_MOV_B32 61440

    %0:sgpr_64 = COPY $sgpr0_sgpr1
    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
    %2:sgpr_32 = COPY $sgpr2
    %3:sgpr_32 = COPY $sgpr3
    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2

    %5:vgpr_32 = COPY $vgpr0
    %6:vgpr_32 = COPY $vgpr1

    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)

    S_ENDPGM 0
...

# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
---
name: test2
liveins:
  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
body: |
  bb.0 (%ir-block.0):
    liveins: $sgpr0_sgpr1

    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
    $vgpr1 = V_MOV_B32_e32 456, implicit $exec

    $sgpr2 = S_MOV_B32 -1
    $sgpr3 = S_MOV_B32 61440

    %0:sgpr_64 = COPY $sgpr0_sgpr1
    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
    %2:sgpr_32 = COPY $sgpr2
    %3:sgpr_32 = COPY $sgpr3
    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2

    %5:vgpr_32 = COPY $vgpr0
    %6:vgpr_32 = COPY $vgpr1

    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)

    S_ENDPGM 0
...

# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 16, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
---
name: test3
liveins:
  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
body: |
  bb.0 (%ir-block.0):
    liveins: $sgpr0_sgpr1

    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
    $vgpr1 = V_MOV_B32_e32 456, implicit $exec

    $sgpr2 = S_MOV_B32 -1
    $sgpr3 = S_MOV_B32 61440

    %0:sgpr_64 = COPY $sgpr0_sgpr1
    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
    %2:sgpr_32 = COPY $sgpr2
    %3:sgpr_32 = COPY $sgpr3
    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2

    %5:vgpr_32 = COPY $vgpr0
    %6:vgpr_32 = COPY $vgpr1

    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)

    S_ENDPGM 0
...

# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1)
---
name: test4
liveins:
  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
body: |
  bb.0 (%ir-block.0):
    liveins: $sgpr0_sgpr1

    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
    $vgpr1 = V_MOV_B32_e32 456, implicit $exec

    $sgpr2 = S_MOV_B32 -1
    $sgpr3 = S_MOV_B32 61440

    %0:sgpr_64 = COPY $sgpr0_sgpr1
    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
    %2:sgpr_32 = COPY $sgpr2
    %3:sgpr_32 = COPY $sgpr3
    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2

    %5:vgpr_32 = COPY $vgpr0
    %6:vgpr_32 = COPY $vgpr1

    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)

    S_ENDPGM 0
...