File: virtregrewrite-undef-identity-copy.mir

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (68 lines) | stat: -rw-r--r-- 4,274 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck %s

# The undef copy of %4 is allocated to $vgpr3, and the identity copy
# was deleted, and $vgpr3 was considered undef. The code to replace
# the undef copy with a kill was incorrectly checking the dest
# operand, rather than the source.

--- |
  define amdgpu_kernel void @undef_identity_copy() {
    ret void
  }

  declare hidden float @bar(<4 x float>)
  declare hidden void @foo()

...
---
name:            undef_identity_copy
tracksRegLiveness: true
frameInfo:
  maxAlignment:    4
  hasCalls:        true
machineFunctionInfo:
  isEntryFunction: true
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  frameOffsetReg:  '$sgpr95'
  stackPtrOffsetReg: '$sgpr32'
body:             |
  bb.0:
    ; CHECK-LABEL: name: undef_identity_copy
    ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
    ; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95, implicit-def $scc
    ; CHECK-NEXT: $sgpr4 = COPY $sgpr95
    ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @foo, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
    ; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
    ; CHECK-NEXT: $sgpr4 = COPY $sgpr95
    ; CHECK-NEXT: $vgpr0 = COPY renamable $vgpr40
    ; CHECK-NEXT: $vgpr1 = COPY renamable $vgpr41
    ; CHECK-NEXT: $vgpr2 = COPY killed renamable $vgpr42
    ; CHECK-NEXT: $vgpr3 = KILL undef renamable $vgpr3
    ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
    ; CHECK-NEXT: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
    ; CHECK-NEXT: S_ENDPGM 0
    %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
    %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
    ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95, implicit-def $scc
    $sgpr4 = COPY $sgpr95
    dead $sgpr30_sgpr31 = SI_CALL %2, @foo, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
    ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
    ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
    $sgpr4 = COPY $sgpr95
    $vgpr0 = COPY %0.sub0
    $vgpr1 = COPY %0.sub1
    $vgpr2 = COPY %0.sub2
    $vgpr3 = COPY undef %4:vgpr_32
    dead $sgpr30_sgpr31 = SI_CALL %3, @bar, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
    %5:vgpr_32 = COPY $vgpr0
    ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
    FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
    S_ENDPGM 0

...