File: CorrectlyRoundedDivSqrt.cpp

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (127 lines) | stat: -rw-r--r-- 4,171 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "Compiler/Optimizer/OpenCLPasses/CorrectlyRoundedDivSqrt/CorrectlyRoundedDivSqrt.hpp"
#include "Compiler/IGCPassSupport.h"

#include "llvmWrapper/IR/DerivedTypes.h"
#include "common/LLVMWarningsPush.hpp"
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/IRBuilder.h>
#include "common/LLVMWarningsPop.hpp"

using namespace llvm;
using namespace IGC;
using namespace IGC::IGCMD;

// Register pass to igc-opt
#define PASS_FLAG "igc-correctly-rounded-div-sqrt"
#define PASS_DESCRIPTION "Ensures single precision divide and sqrt are correctly rounded"
#define PASS_CFG_ONLY false
#define PASS_ANALYSIS false
IGC_INITIALIZE_PASS_BEGIN(CorrectlyRoundedDivSqrt, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
IGC_INITIALIZE_PASS_END(CorrectlyRoundedDivSqrt, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)

char CorrectlyRoundedDivSqrt::ID = 0;

CorrectlyRoundedDivSqrt::CorrectlyRoundedDivSqrt()
    : ModulePass(ID), m_forceCR(false), m_hasHalfTy(false), m_IsCorrectlyRounded(false) {
  initializeCorrectlyRoundedDivSqrtPass(*PassRegistry::getPassRegistry());
}

CorrectlyRoundedDivSqrt::CorrectlyRoundedDivSqrt(bool forceCR, bool HasHalf)
    : ModulePass(ID), m_forceCR(forceCR), m_hasHalfTy(HasHalf), m_IsCorrectlyRounded(false) {
  initializeCorrectlyRoundedDivSqrtPass(*PassRegistry::getPassRegistry());
}

bool CorrectlyRoundedDivSqrt::runOnModule(Module &M) {
  // Was the module compiled with the CR flag on?
  m_IsCorrectlyRounded = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData()->compOpt.CorrectlyRoundedDivSqrt;

  // Even if it wasn't, it's possible that CR was requested through a build-time option
  // (This is relevant at least for SPIR)
  if (!m_IsCorrectlyRounded && !m_forceCR) {
    return false;
  }

  m_changed = false;
  m_module = &M;

  for (Function &F : M) {
    if (F.isDeclaration()) {
      if (!m_hasHalfTy)
        m_changed |= processDeclaration(F);
    } else {
      visit(F);
    }
  }
  m_module = nullptr;
  return m_changed;
}

bool CorrectlyRoundedDivSqrt::processDeclaration(Function &F) {
  StringRef name = F.getName();
  if (name.startswith("_Z4sqrt")) {
    std::string newName = name.str();
    newName[2] = '7';
    newName.insert(7, "_cr");
    F.setName(newName);
    return true;
  } else if (name.startswith("_Z16__spirv_ocl_sqrt")) {
    std::string newName = name.str();
    newName[3] = '9';
    newName.insert(20, "_cr");
    F.setName(newName);
    return true;
  }

  // not sqrt function
  return false;
}

Value *CorrectlyRoundedDivSqrt::emitIEEEDivide(BinaryOperator *I, Value *Op0, Value *Op1) {
  Type *Ty = Op0->getType();
  IRBuilder<> IRB(I);
  std::string FuncName = "__builtin_spirv_divide_cr_f32_f32";
  SmallVector<Type *, 2> ArgsTypes{Ty->getScalarType(), Ty->getScalarType()};
  auto FT = FunctionType::get(Ty->getScalarType(), ArgsTypes, false);
  auto IEEEDivide = m_module->getOrInsertFunction(FuncName, FT);

  Value *Divide = nullptr;
  if (!isa<VectorType>(Ty)) {
    Value *Args[] = {Op0, Op1};
    Divide = IRB.CreateCall(IEEEDivide, Args);
  } else {
    auto vType = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
    unsigned VecLen = (uint32_t)vType->getNumElements();
    Divide = UndefValue::get(Ty);
    for (unsigned i = 0; i < VecLen; i++) {
      auto *SOp0 = IRB.CreateExtractElement(Op0, i);
      auto *SOp1 = IRB.CreateExtractElement(Op1, i);
      Value *Args[] = {SOp0, SOp1};
      auto *ScalarDivide = IRB.CreateCall(IEEEDivide, Args);
      Divide = IRB.CreateInsertElement(Divide, ScalarDivide, i);
    }
  }

  return Divide;
}

void CorrectlyRoundedDivSqrt::visitFDiv(BinaryOperator &I) {
  Type *Ty = I.getType();

  if (Ty->getScalarType()->isFloatTy()) {
    auto *Divide = emitIEEEDivide(&I, I.getOperand(0), I.getOperand(1));

    I.replaceAllUsesWith(Divide);
    I.eraseFromParent();
    m_changed = true;
  }
}