File: insert-waitcnts-exp.mir

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (63 lines) | stat: -rw-r--r-- 2,590 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck %s
--- |
  define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x
  i32> inreg, i32 inreg %w, float %v) #0 {
    %a = load volatile float, ptr addrspace(1) undef
    %b = load volatile float, ptr addrspace(1) undef
    %c = load volatile float, ptr addrspace(1) undef
    %d = load volatile float, ptr addrspace(1) undef
    call void @llvm.amdgcn.exp.f32(i32 15, i32 1, float %a, float %b, float %c, float %d, i1 true, i1 false)
    ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
  }

  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0

  attributes #0 = { nounwind }

...
---

# CHECK-LABEL: name: exp_done_waitcnt{{$}}
# CHECK: EXP_DONE
# CHECK-NEXT: S_WAITCNT 3855
# CHECK: $vgpr0 = V_MOV_B32
# CHECK: $vgpr1 = V_MOV_B32
# CHECK: $vgpr2 = V_MOV_B32
# CHECK: $vgpr3 = V_MOV_B32
name:            exp_done_waitcnt
alignment:       1
exposesReturnsTwice: false
legalized:       false
regBankSelected: false
selected:        false
tracksRegLiveness: true
frameInfo:
  isFrameAddressTaken: false
  isReturnAddressTaken: false
  hasStackMap:     false
  hasPatchPoint:   false
  stackSize:       0
  offsetAdjustment: 0
  maxAlignment:    0
  adjustsStack:    false
  hasCalls:        false
  maxCallFrameSize: 0
  hasOpaqueSPAdjustment: false
  hasVAStart:      false
  hasMustTailInVarArgFunc: false
body:             |
  bb.0 (%ir-block.2):
    $sgpr3 = S_MOV_B32 61440
    $sgpr2 = S_MOV_B32 -1
    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
    $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
    $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
    EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec
    $vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec
    $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec
    $vgpr2 = V_MOV_B32_e32 1073741824, implicit $exec
    $vgpr3 = V_MOV_B32_e32 1082130432, implicit $exec
    SI_RETURN_TO_EPILOG killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3

...