File: vcmp-saveexec-to-vcmpx.mir

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (89 lines) | stat: -rw-r--r-- 3,985 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck --check-prefixes=GCN,GFX1010 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck --check-prefixes=GCN,GFX1030 %s
---

# After the Optimize exec masking (post-RA) pass, there's a change of having v_cmpx instructions
# being introduced whenever there's a sequence of v_cmp and s_and_saveexec instructions
# which can be safely replaced in various cases.
# However, it is not safe to do so when the generated code sequence would omit part of the EXEC mask
# which could occur when a subset of EXEC is used as input operand in the v_cmp instruction.
# The idea behind this test is to check if the subregisters are correctly handled here.

# GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_exec_subreg
# GCN: V_CMP_GT_U32_e64
# GCN: S_AND_SAVEEXEC_B64
name: vcmp_saveexec_to_mov_vcmpx_exec_subreg
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $sgpr2
    renamable $sgpr0_sgpr1 = V_CMP_GT_U32_e64 $sgpr2, killed $vgpr0, implicit $exec
    $sgpr2_sgpr3 = COPY $exec, implicit-def $exec
    $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
    $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3
...

---

# Ensure the transformation does not get applied when the v_cmp target is used before the s_and_saveexec instruction.

# GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_exec_intermediate_use
# GCN: V_CMP_LT_F32_e64
# GCN: V_WRITELANE_B32
# GCN: S_AND_SAVEEXEC_B64
name: vcmp_saveexec_to_mov_vcmpx_exec_intermediate_use
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1, $sgpr2
    renamable $sgpr0_sgpr1 = V_CMP_LT_F32_e64 0, 953267991, 2, $vgpr1, 0, implicit $mode, implicit $exec
    $vgpr0 = V_WRITELANE_B32 0, $sgpr0, $vgpr0
    $sgpr2_sgpr3 = COPY $exec, implicit-def $exec
    $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
    $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3
...

---

# Check if the modifiers are preserved when generating the V_CMPX instruction.

# GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_check_abs
# GFX1010: V_CMP_LT_F32_e64
# GFX1010: S_AND_SAVEEXEC_B64
# GFX1030: S_MOV_B64
# GFX1030-NEXT: V_CMPX_LT_F32_nosdst_e64 0, 953267991, 2
name: vcmp_saveexec_to_mov_vcmpx_check_abs
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0
    renamable $sgpr0_sgpr1 = V_CMP_LT_F32_e64 0, 953267991, 2, $vgpr0, 0, implicit $mode, implicit $exec
    $sgpr2_sgpr3 = COPY $exec, implicit-def $exec
    $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
    $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3
...

---

# Check if the sequence will be optimized even with more than 5 (unrelated) instructions inbetween the v_cmp and s_and_saveexec.

# GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_check_many_instrs
# GFX1010: V_CMP_LT_F32_e64
# GFX1010: S_AND_SAVEEXEC_B64
# GFX1030: S_MOV_B64
# GFX1030: V_CMPX_LT_F32_nosdst_e64 0, 953267991, 2
name: vcmp_saveexec_to_mov_vcmpx_check_many_instrs
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $sgpr2, $vgpr1
    renamable $sgpr0_sgpr1 = V_CMP_LT_F32_e64 0, 953267991, 2, $vgpr0, 0, implicit $mode, implicit $exec
    $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1
    $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1
    $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1
    $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1
    $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1
    $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1
    $sgpr2_sgpr3 = COPY $exec, implicit-def $exec
    $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
    $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3