File: GenXFloatControl.cpp

package info (click to toggle)
intel-graphics-compiler2 2.20.5-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 107,552 kB
  • sloc: cpp: 807,012; lisp: 287,936; ansic: 16,397; python: 4,010; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 37
file content (153 lines) | stat: -rw-r--r-- 5,683 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*========================== begin_copyright_notice ============================

Copyright (C) 2024-2025 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "GenX.h"
#include "GenXTargetMachine.h"
#include "GenXUtil.h"
#include "GenXVisa.h"

#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"

#define DEBUG_TYPE "GENX_FLOAT_CONTROL"

namespace llvm {

class GenXFloatControl : public FunctionPass {
  uint32_t Mask = 0;

  bool getFloatControl(Function &F, uint32_t *Val);
  Value *buildCr0Update(uint32_t Value, Instruction *InsertBefore);
  void buildCr0Write(Value *V, Instruction *InsertBefore);

public:
  static char ID;

  explicit GenXFloatControl() : FunctionPass(ID) {}

  bool runOnFunction(Function &F) override;
  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<TargetPassConfig>();
    AU.setPreservesAll();
  }
  StringRef getPassName() const override { return "GenXFloatControl"; }
};

void initializeGenXFloatControlPass(PassRegistry &);

} // namespace llvm

using namespace llvm;
using namespace genx;
using namespace visa;

char GenXFloatControl::ID = 0;

INITIALIZE_PASS_BEGIN(GenXFloatControl, "GenXFloatControl", "GenXFloatControl",
                      false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(GenXFloatControl, "GenXFloatControl", "GenXFloatControl",
                    false, false)

FunctionPass *llvm::createGenXFloatControlPass() {
  initializeGenXFloatControlPass(*PassRegistry::getPassRegistry());
  return new GenXFloatControl();
}

bool GenXFloatControl::runOnFunction(Function &F) {
  // By default allow to specify with the attribute only
  // rounding and denorm modes
  Mask = CRBits::RoundingBitMask | CRBits::DoublePrecisionDenorm |
         CRBits::SinglePrecisionDenorm | CRBits::HalfPrecisionDenorm;
  // Default float control:
  //   rounding mode = nearest even
  //   denormals = retain
  uint32_t FloatControl = CRBits::RTNE | CRBits::DoublePrecisionDenorm |
                          CRBits::SinglePrecisionDenorm |
                          CRBits::HalfPrecisionDenorm;
  const auto *Subtarget = &getAnalysis<TargetPassConfig>()
                               .getTM<GenXTargetMachine>()
                               .getGenXSubtarget();
  if (!getFloatControl(F, &FloatControl) && !fg::isHead(F))
    return false;
  if (Subtarget->hasSystolicDenormControl()) {
    // Always retain denormals in systolic operations
    Mask |= CRBits::SystolicDenorm;
    FloatControl |= CRBits::SystolicDenorm;
  }
  // On kernel entry cr0 is set to zero, so in case of zero float control
  // we don't need to update it
  if (FloatControl == 0 && fg::isGroupHead(F))
    return false;
  // Kernels, stackcalls and subroutines with attribute set float control on
  // entry - provided by the attribute or the default one
  auto *OldV = buildCr0Update(FloatControl, F.getEntryBlock().getFirstNonPHI());
  if (fg::isGroupHead(F))
    return true;
  // Stackcalls and subroutines with attribute must save caller's float
  // control on entry and restore it before return
  for (auto &BB : F)
    if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
      buildCr0Write(OldV, RI);
  return true;
}

bool GenXFloatControl::getFloatControl(Function &F, uint32_t *Val) {
  if (!F.hasFnAttribute(FunctionMD::CMFloatControl))
    return false;
  F.getFnAttribute(FunctionMD::CMFloatControl)
      .getValueAsString()
      .getAsInteger(0, *Val);
  return true;
}

Value *GenXFloatControl::buildCr0Update(uint32_t Value,
                                        Instruction *InsertBefore) {
  IRBuilder<> B(InsertBefore);
  auto &DL = InsertBefore->getDebugLoc();
  auto *M = InsertBefore->getModule();
  auto *Ty = B.getInt32Ty();
  auto *VTy = IGCLLVM::FixedVectorType::get(Ty, 4);
  auto *Id = B.getInt32(PreDefined_Vars::PREDEFINED_CR0);
  Region R(Ty);
  auto *ReadPredefDecl =
      vc::getAnyDeclaration(M, GenXIntrinsic::genx_read_predef_reg, {VTy, VTy});
  auto *WritePredefDecl = vc::getAnyDeclaration(
      M, GenXIntrinsic::genx_write_predef_reg, {VTy, VTy});
  auto *AndReadPredef =
      B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)});
  auto *AndRdRegion =
      R.createRdRegion(AndReadPredef, "", InsertBefore, DL, true);
  auto *And = B.CreateAnd(AndRdRegion, ~Mask);
  auto *AndWrRegion =
      R.createWrRegion(AndReadPredef, And, "", InsertBefore, DL);
  B.CreateCall(WritePredefDecl, {Id, AndWrRegion});
  auto *OrReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)});
  auto *OrRdRegion = R.createRdRegion(OrReadPredef, "", InsertBefore, DL, true);
  auto *Or = B.CreateOr(OrRdRegion, Value & Mask);
  auto *OrWrRegion = R.createWrRegion(OrReadPredef, Or, "", InsertBefore, DL);
  B.CreateCall(WritePredefDecl, {Id, OrWrRegion});
  return AndRdRegion;
}

void GenXFloatControl::buildCr0Write(Value *V, Instruction *InsertBefore) {
  IRBuilder<> B(InsertBefore);
  auto &DL = InsertBefore->getDebugLoc();
  auto *M = InsertBefore->getModule();
  auto *Ty = B.getInt32Ty();
  auto *VTy = IGCLLVM::FixedVectorType::get(Ty, 4);
  auto *Id = B.getInt32(PreDefined_Vars::PREDEFINED_CR0);
  Region R(Ty);
  auto *ReadPredefDecl =
      vc::getAnyDeclaration(M, GenXIntrinsic::genx_read_predef_reg, {VTy, VTy});
  auto *WritePredefDecl = vc::getAnyDeclaration(
      M, GenXIntrinsic::genx_write_predef_reg, {VTy, VTy});
  auto *ReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)});
  auto *WrRegion = R.createWrRegion(ReadPredef, V, "", InsertBefore, DL);
  B.CreateCall(WritePredefDecl, {Id, WrRegion});
}