File: GenericShaderState.cpp

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (124 lines) | stat: -rw-r--r-- 5,461 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*========================== begin_copyright_notice ============================

Copyright (C) 2024 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "common/LLVMWarningsPush.hpp"
#include <llvm/IR/Function.h>
#include "common/LLVMWarningsPop.hpp"
#include "Compiler/CISACodeGen/GenericShaderState.hpp"

using namespace llvm;

namespace IGC {
GenericShaderState::GenericShaderState(const llvm::Function &Entry, CodeGenContext &Ctx) : Entry(Entry), Ctx(Ctx) {
  m_numBlocks = Entry.size();
}

unsigned int GenericShaderState::GetSamplerCount(unsigned int samplerCount) const {
  if (samplerCount > 0) {
    if (samplerCount <= 4)
      return 1; // between 1 and 4 samplers used
    else if (samplerCount >= 5 && samplerCount <= 8)
      return 2; // between 5 and 8 samplers used
    else if (samplerCount >= 9 && samplerCount <= 12)
      return 3; // between 9 and 12 samplers used
    else if (samplerCount >= 13 && samplerCount <= 16)
      return 4; // between 13 and 16 samplers used
    else
      // Samplers count out of range. Force value 0 to avoid undefined behavior.
      return 0;
  }
  return 0;
}

void GenericShaderState::CreateGatherMap() {
  auto &pushInfo = Ctx.getModuleMetaData()->pushInfo;
  int index = -1;
  gatherMap.reserve(pushInfo.constants.size());
  for (auto I = pushInfo.constants.begin(), E = pushInfo.constants.end(); I != E; I++) {
    unsigned int address = (I->first.bufId * 256 * 4) + (I->first.eltId);
    unsigned int cstOffset = address / 4;
    unsigned int cstChannel = address % 4;
    if (cstOffset != index) {
      USC::SConstantGatherEntry entry;
      entry.GatherEntry.Fields.constantBufferOffset = cstOffset % 256;
      entry.GatherEntry.Fields.channelMask = BIT(cstChannel);
      // with 3DSTATE_DX9_CONSTANT if buffer is more than 4Kb,
      //  the constant after 255 can be accessed in constant buffer 1
      int CBIndex = cstOffset / 256;
      entry.GatherEntry.Fields.constantBufferIndex = CBIndex;
      m_constantBufferMask |= BIT(CBIndex);
      gatherMap.push_back(entry);
      index = cstOffset;
    } else {
      gatherMap[gatherMap.size() - 1].GatherEntry.Fields.channelMask |= BIT(cstChannel);
    }
  }

  // The size of the gather map must be even
  if (gatherMap.size() % 2 != 0) {
    USC::SConstantGatherEntry entry;
    entry.GatherEntry.Value = 0;
    gatherMap.push_back(entry);
  }
}

void GenericShaderState::CreateConstantBufferOutput(SKernelProgram *pKernelProgram) {
  pKernelProgram->ConstantBufferMask = m_constantBufferMask;
  pKernelProgram->gatherMapSize = gatherMap.size();
  if (pKernelProgram->gatherMapSize > 0) {
    pKernelProgram->gatherMap = new char[pKernelProgram->gatherMapSize * sizeof(USC::SConstantGatherEntry)];
    memcpy_s(pKernelProgram->gatherMap, pKernelProgram->gatherMapSize * sizeof(USC::SConstantGatherEntry),
             &gatherMap[0], gatherMap.size() * sizeof(USC::SConstantGatherEntry));
    pKernelProgram->ConstantBufferLength = m_ConstantBufferLength / getMinPushConstantBufferAlignmentInBytes();
  }

  if (m_cbSlot != -1) {
    pKernelProgram->bufferSlot = m_cbSlot;
    pKernelProgram->statelessCBPushedSize = m_statelessCBPushedSize;
  }

  auto &pushInfo = Ctx.getModuleMetaData()->pushInfo;
  // for simple push
  for (unsigned int i = 0; i < pushInfo.simplePushBufferUsed; i++) {
    pKernelProgram->simplePushInfoArr[i].m_cbIdx = pushInfo.simplePushInfoArr[i].cbIdx;
    pKernelProgram->simplePushInfoArr[i].m_pushableAddressGrfOffset =
        pushInfo.simplePushInfoArr[i].pushableAddressGrfOffset;
    pKernelProgram->simplePushInfoArr[i].m_pushableOffsetGrfOffset =
        pushInfo.simplePushInfoArr[i].pushableOffsetGrfOffset;
    pKernelProgram->simplePushInfoArr[i].m_offset = pushInfo.simplePushInfoArr[i].offset;
    pKernelProgram->simplePushInfoArr[i].m_size = pushInfo.simplePushInfoArr[i].size;
    pKernelProgram->simplePushInfoArr[i].isStateless = pushInfo.simplePushInfoArr[i].isStateless;
    pKernelProgram->simplePushInfoArr[i].isBindless = pushInfo.simplePushInfoArr[i].isBindless;
  }

  if (GetContext().m_ConstantBufferReplaceShaderPatterns) {
    pKernelProgram->m_ConstantBufferReplaceShaderPatterns = GetContext().m_ConstantBufferReplaceShaderPatterns;
    pKernelProgram->m_ConstantBufferReplaceShaderPatternsSize = GetContext().m_ConstantBufferReplaceShaderPatternsSize;
    pKernelProgram->m_ConstantBufferUsageMask = GetContext().m_ConstantBufferUsageMask;
    pKernelProgram->m_ConstantBufferReplaceSize = GetContext().m_ConstantBufferReplaceSize;
  }
}

void GenericShaderState::setScratchUsage(CodeGenContext &Ctx, SProgramOutput &Prog) {
  bool SepSpillPvtSS = SeparateSpillAndScratch(&Ctx);
  bool SeparateScratchWA =
      IGC_IS_FLAG_ENABLED(EnableSeparateScratchWA) && !Ctx.getModuleMetaData()->disableSeparateScratchWA;
  Prog.init(!Ctx.platform.hasScratchSurface(),
            Ctx.platform.maxPerThreadScratchSpace(
                    ),
            Ctx.getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory, SepSpillPvtSS, SeparateScratchWA);
}

void GenericShaderState::setScratchUsage(SProgramOutput &Prog) { setScratchUsage(Ctx, Prog); }

uint32_t GenericShaderState::GetShaderThreadUsageRate() {
  uint32_t grfNum = GetContext().getNumGRFPerThread();
  // prevent callee divide by zero
  return std::max<uint32_t>(1, grfNum / CodeGenContext::DEFAULT_TOTAL_GRF_NUM);
}
} // namespace IGC