1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This file contains a DAG scheduling mutation to cluster shader
/// exports.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUExportClustering.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
using namespace llvm;
namespace {
class ExportClustering : public ScheduleDAGMutation {
public:
ExportClustering() = default;
void apply(ScheduleDAGInstrs *DAG) override;
};
static bool isExport(const SUnit &SU) {
return SIInstrInfo::isEXP(*SU.getInstr());
}
static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
const MachineInstr *MI = SU->getInstr();
unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
}
static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
unsigned PosCount) {
if (!PosCount || PosCount == Chain.size())
return;
// Position exports should occur as soon as possible in the shader
// for optimal performance. This moves position exports before
// other exports while preserving the order within different export
// types (pos or other).
SmallVector<SUnit *, 8> Copy(Chain);
unsigned PosIdx = 0;
unsigned OtherIdx = PosCount;
for (SUnit *SU : Copy) {
if (isPositionExport(TII, SU))
Chain[PosIdx++] = SU;
else
Chain[OtherIdx++] = SU;
}
}
static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
SUnit *ChainHead = Exports.front();
// Now construct cluster from chain by adding new edges.
for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
SUnit *SUa = Exports[Idx];
SUnit *SUb = Exports[Idx + 1];
// Copy all dependencies to the head of the chain to avoid any
// computation being inserted into the chain.
for (const SDep &Pred : SUb->Preds) {
SUnit *PredSU = Pred.getSUnit();
if (!isExport(*PredSU) && !Pred.isWeak())
DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
}
// New barrier edge ordering exports
DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
// Also add cluster edge
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
}
}
static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
SmallVector<SDep, 2> ToAdd, ToRemove;
for (const SDep &Pred : SU.Preds) {
SUnit *PredSU = Pred.getSUnit();
if (Pred.isBarrier() && isExport(*PredSU)) {
ToRemove.push_back(Pred);
if (isExport(SU))
continue;
// If we remove a barrier we need to copy dependencies
// from the predecessor to maintain order.
for (const SDep &ExportPred : PredSU->Preds) {
SUnit *ExportPredSU = ExportPred.getSUnit();
if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
}
}
}
for (SDep Pred : ToRemove)
SU.removePred(Pred);
for (SDep Pred : ToAdd)
DAG->addEdge(&SU, Pred);
}
void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
SmallVector<SUnit *, 8> Chain;
// Pass through DAG gathering a list of exports and removing barrier edges
// creating dependencies on exports. Freeing exports of successor edges
// allows more scheduling freedom, and nothing should be order dependent
// on exports. Edges will be added later to order the exports.
unsigned PosCount = 0;
for (SUnit &SU : DAG->SUnits) {
if (!isExport(SU))
continue;
Chain.push_back(&SU);
if (isPositionExport(TII, &SU))
PosCount++;
removeExportDependencies(DAG, SU);
SmallVector<SDep, 4> Succs(SU.Succs);
for (SDep Succ : Succs)
removeExportDependencies(DAG, *Succ.getSUnit());
}
// Apply clustering if there are multiple exports
if (Chain.size() > 1) {
sortChain(TII, Chain, PosCount);
buildCluster(Chain, DAG);
}
}
} // end namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
return std::make_unique<ExportClustering>();
}
} // end namespace llvm
|