File: merge-load-store-vreg.mir

package info (click to toggle)
llvm-toolchain-7 1%3A7.0.1-8
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 733,456 kB
  • sloc: cpp: 3,776,651; ansic: 633,271; asm: 350,301; python: 142,716; objc: 107,612; sh: 22,626; lisp: 11,056; perl: 7,999; pascal: 6,742; ml: 5,537; awk: 3,536; makefile: 2,557; cs: 2,027; xml: 841; ruby: 156
file content (60 lines) | stat: -rw-r--r-- 2,254 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s

# If there's a base offset, check that SILoadStoreOptimizer creates
# V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than
# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently
# clobbered.

# GCN-LABEL: name: kernel

# VI: V_ADD_I32_e64 %6, %0,
# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
# VI: V_ADD_I32_e64 %10, %3,
# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,

# GFX9: V_ADD_U32_e64 %6, %0,
# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8,
# GFX9: V_ADD_U32_e64 %9, %3,
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,

--- |
  @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4

  define amdgpu_kernel void @kernel() {
    bb.0:
      br label %bb2

    bb1:
      ret void

    bb2:
      %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
      %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
      %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
      %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
      br label %bb1
    }
---
name:            kernel
body:             |
  bb.0:
    %0:vgpr_32 = IMPLICIT_DEF
    S_BRANCH %bb.2

  bb.1:
    S_ENDPGM

  bb.2:
    %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec
    %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
    V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
    DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
    %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
    %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
    %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
    S_BRANCH %bb.1
...