File: AMDGPUInsertSingleUseVDST.cpp

package info (click to toggle)
llvm-toolchain-18 1%3A18.1.8-18
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,908,340 kB
  • sloc: cpp: 6,667,937; ansic: 1,440,452; asm: 883,619; python: 230,549; objc: 76,880; f90: 74,238; lisp: 35,989; pascal: 16,571; sh: 10,229; perl: 7,459; ml: 5,047; awk: 3,523; makefile: 2,987; javascript: 2,149; xml: 892; fortran: 649; cs: 573
file content (122 lines) | stat: -rw-r--r-- 4,298 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
/// instructions that produce single-use VGPR values. If the value is forwarded
/// to the consumer instruction prior to VGPR writeback, the hardware can
/// then skip (kill) the VGPR write.
//
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCRegister.h"
#include "llvm/Pass.h"

using namespace llvm;

#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"

namespace {
class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
private:
  const SIInstrInfo *SII;

public:
  static char ID;

  AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}

  void emitSingleUseVDST(MachineInstr &MI) const {
    // Mark the following instruction as a single-use producer:
    //   s_singleuse_vdst { supr0: 1 }
    BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
        .addImm(0x1);
  }

  bool runOnMachineFunction(MachineFunction &MF) override {
    const auto &ST = MF.getSubtarget<GCNSubtarget>();
    if (!ST.hasVGPRSingleUseHintInsts())
      return false;

    SII = ST.getInstrInfo();
    const auto *TRI = &SII->getRegisterInfo();
    bool InstructionEmitted = false;

    for (MachineBasicBlock &MBB : MF) {
      DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits

      // Handle boundaries at the end of basic block separately to avoid
      // false positives. If they are live at the end of a basic block then
      // assume it has more uses later on.
      for (const auto &Liveouts : MBB.liveouts())
        RegisterUseCount[Liveouts.PhysReg] = 2;

      for (MachineInstr &MI : reverse(MBB.instrs())) {
        // All registers in all operands need to be single use for an
        // instruction to be marked as a single use producer.
        bool AllProducerOperandsAreSingleUse = true;

        for (const auto &Operand : MI.operands()) {
          if (!Operand.isReg())
            continue;
          const auto Reg = Operand.getReg();

          // Count the number of times each register is read.
          if (Operand.readsReg())
            RegisterUseCount[Reg]++;

          // Do not attempt to optimise across exec mask changes.
          if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
            for (auto &UsedReg : RegisterUseCount)
              UsedReg.second = 2;
          }

          // If we are at the point where the register first became live,
          // check if the operands are single use.
          if (!MI.modifiesRegister(Reg, TRI))
            continue;
          if (RegisterUseCount[Reg] > 1)
            AllProducerOperandsAreSingleUse = false;
          // Reset uses count when a register is no longer live.
          RegisterUseCount.erase(Reg);
        }
        if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
          // TODO: Replace with candidate logging for instruction grouping
          // later.
          emitSingleUseVDST(MI);
          InstructionEmitted = true;
        }
      }
    }
    return InstructionEmitted;
  }
};
} // namespace

char AMDGPUInsertSingleUseVDST::ID = 0;

char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;

INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
                "AMDGPU Insert SingleUseVDST", false, false)