File: HWConformity.h

package info (click to toggle)
intel-graphics-compiler2 2.28.4-3
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 792,656 kB
  • sloc: cpp: 5,761,745; ansic: 466,928; lisp: 312,143; python: 114,790; asm: 44,736; pascal: 10,930; sh: 8,033; perl: 7,914; ml: 3,625; awk: 3,523; yacc: 2,747; javascript: 2,667; lex: 1,898; f90: 1,028; cs: 573; xml: 474; makefile: 344; objc: 162
file content (275 lines) | stat: -rw-r--r-- 11,453 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2025 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#ifndef _HWCONFORMITY_H_
#define _HWCONFORMITY_H_

#include "BuildIR.h"
#include "Common_ISA_util.h"
#include "FlowGraph.h"
#include "G4_IR.hpp"

#include <map>

namespace vISA {
class DPASSrc2RSCache {
public:
  std::vector<G4_Declare *> GRFCache;
  unsigned latestID;
  bool firstDpas;

  DPASSrc2RSCache() {
    latestID = 0;
    firstDpas = true;
    GRFCache.resize(16, nullptr);
  }
  ~DPASSrc2RSCache() {}
};

class PointsToAnalysis;
class HWConformity {
  IR_Builder &builder;
  G4_Kernel &kernel;

  // this must be set before calling the individual fix functions
  G4_BB *curBB = nullptr;

  // This is added for data layout optimization.
  // Currently it only targets packed-byte pattern.
  // Can be extended later for other patterns.
  enum AccessPattern {
    ACCESS_PATTERN_UNDEF = 0,
    ACCESS_PATTERN_PACKED_BYTE = 1,
    ACCESS_PATTERN_INVALID = 2
  };
  std::map<G4_Declare *, AccessPattern> dclAccessPattern;

  AccessPattern getAccessPattern(G4_Declare *dcl) {
    auto iter = dclAccessPattern.find(dcl);
    if (iter == dclAccessPattern.end()) {
      return ACCESS_PATTERN_UNDEF;
    }
    return (*iter).second;
  }
  void setAccessPattern(G4_Declare *dcl, AccessPattern ap) {
    dclAccessPattern[dcl] = ap;
  }
  bool markPackedByteReference(G4_Kernel &kernel, G4_Operand *opnd,
                               G4_INST *inst);
  G4_Operand *fixPackedByteReference(IR_Builder &builder, G4_Operand *opnd);

  // helper functions
  bool hasBadRegion(G4_INST *inst);
  bool canSplitInst(G4_INST *inst, G4_INST *use_op);
  bool splitInstListForByteDst(INST_LIST_ITER it, G4_BB *bb,
                               uint16_t extypesize);

  G4_SrcRegRegion *insertCopyBefore(INST_LIST_ITER it, uint32_t srcNum,
                                    G4_SubReg_Align tmpAlign, G4_BB *bb);
  G4_SrcRegRegion *insertCopyAtBBEntry(G4_BB *bb, G4_ExecSize newExecSize,
                                       G4_Operand *src);
  void broadcast(G4_BB *bb, INST_LIST_ITER it, int srcPos,
                 G4_SubReg_Align subAlign);

  G4_INST *splitInstWithByteDst(G4_INST *expand_op);
  G4_SubReg_Align getDclAlignment(int opndBytes, G4_INST *inst, bool isScalar);

  // HW conformity check functions
  void fixPackedSource(INST_LIST_ITER it, G4_BB *bb);
  bool fixMathInst(INST_LIST_ITER it, G4_BB *bb);
  bool fixMULInst(INST_LIST_ITER &it, G4_BB *bb);
  void fixMULHInst(INST_LIST_ITER &i, G4_BB *bb);
  void fixOpnds(INST_LIST_ITER it, G4_BB *bb, G4_Type &exType);
  bool fixLine(INST_LIST_ITER it, G4_BB *bb);
  bool fixOpndType(INST_LIST_ITER it, G4_BB *bb);
  void fixPackedHFConversions(INST_LIST_ITER it, G4_BB *bb);

  bool fixIndirectMoviSimd16ToSimd8(INST_LIST_ITER i, G4_BB *bb);
  bool fixIndirectOpnd(INST_LIST_ITER i, G4_BB *bb);
  bool fixIndirectSrcForCompressedInst(INST_LIST_ITER i, G4_BB *bb);
  void fix3SrcInst(INST_LIST_ITER i, G4_BB *bb);
  void fixAlign13SrcInst(INST_LIST_ITER i, G4_BB *bb);
  void fixCompareInst(INST_LIST_ITER i, G4_BB *bb, G4_Type exType,
                      int dst_elsize);
  bool fixDstAlignment(INST_LIST_ITER i, G4_BB *bb, G4_Type extype,
                       unsigned int dst_elsize);
  void fixPredicateIndirectInst(INST_LIST_ITER i, G4_BB *bb);
  bool fixDstAlignmentWithVectorImm(INST_LIST_ITER i, G4_BB *bb);
  bool fixAcc(INST_LIST_ITER i, G4_BB *bb);
  void fixDstHstride(INST_LIST_ITER i, int extypesize);
  void fixMADInst(G4_BB *bb);
  void fixBfnInst(G4_BB *bb);
  void fixSrcRegion(G4_INST *inst);
  bool fixOddAlignSrc1Region(INST_LIST_ITER i, G4_BB *bb);
  void conformBB(G4_BB *bb);
  void fixSADA2Inst(G4_BB *bb);
  void fixMixedHFInst(G4_BB *bb);
  void fixSendInst(G4_BB *bb);
  void fixsrc1src2Overlap(G4_BB *bb);
  bool canSplitByteDst(G4_opcode op);
  bool fixInstOpndTypeAlign(INST_LIST_ITER i, G4_BB *bb);
  void fixOpndTypeAlign(G4_BB *bb);
  void fixInstExecSize(G4_BB *bb);
  bool reduceExecSize(INST_LIST_ITER iter, G4_BB *bb);
  bool reduceExecSizeForMath(INST_LIST_ITER iter, G4_BB *bb);
  bool checkSrcDstOverlap(INST_LIST_ITER iter, G4_BB *bb, bool compOpt);
  void splitInstruction(INST_LIST_ITER iter, G4_BB *bb, bool compOpt,
                        uint8_t numInFirstMov, bool rule4_11,
                        bool allowSrcCrossGRF);
  void splitSIMD32Inst(INST_LIST_ITER iter, G4_BB *bb);
  void moveSrcToGRF(INST_LIST_ITER it, uint32_t srcNum, uint16_t numGRF,
                    G4_BB *bb);
  void saveDst(INST_LIST_ITER &it, uint8_t stride, G4_BB *bb);
  void restoreDst(INST_LIST_ITER &it, G4_DstRegRegion *origDst, G4_BB *bb);
  void insertMovAfter(INST_LIST_ITER &it, uint16_t stride, G4_BB *bb);
  void removeBadSrc(INST_LIST_ITER &it, G4_BB *bb, bool crossGRFDst,
                    bool oneGRFSrc[3], bool badTwoGRFSrc[3]);
  uint8_t checkMinExecSize(G4_opcode op);
  void convertMAD2MulAdd(INST_LIST_ITER iter, G4_BB *bb);
  void maintainDU4TempMov(G4_INST *inst, G4_INST *movInst);
  void fixImm64(INST_LIST_ITER i, G4_BB *bb);
  bool checkSrcCrossGRF(INST_LIST_ITER &i, G4_BB *bb);
  G4_INST *checkSrcDefInst(G4_INST *inst, G4_INST *def_inst, uint32_t srcNum);
  bool emulate64bMov(INST_LIST_ITER iter, G4_BB *bb);
  bool fix64bInst(INST_LIST_ITER i, G4_BB *bb);
  bool fixPlaneInst(INST_LIST_ITER i, G4_BB *bb);
  void expandPlaneInst(INST_LIST_ITER i, G4_BB *bb);
  bool fixAddcSubb(G4_BB *bb);
  void fixDataLayout();
  void fixBFMixedMode();
  bool fixMov(INST_LIST_ITER i, G4_BB *bb);
  bool fixRotate(INST_LIST_ITER i, G4_BB *bb);
  bool fixIntToHFMove(G4_BB *bb);
  void fixEvEURegionRestrictions(G4_BB *bb);

  bool isFloatOr64b(G4_INST *inst);
  uint16_t getSrcStride(G4_SrcRegRegion *src);
  bool fixBFMove(INST_LIST_ITER i, G4_BB *bb);
  void fixUnalignedRegions(INST_LIST_ITER it, G4_BB *bb);
  bool fixFcvt(INST_LIST_ITER i, G4_BB *bb);
  void fixByteXBarRestriction(INST_LIST_ITER it, G4_BB *bb);
  void fixDPAS(INST_LIST_ITER it, G4_BB *bb);
  bool fixSrnd(INST_LIST_ITER i, G4_BB *bb);
  void fixShiftInsts(INST_LIST_ITER i, G4_BB *bb);
  void split64bCopyToSIMD1Insts(INST_LIST_ITER it, G4_BB *bb);

  void helperGenerateTempDst(G4_BB *bb, INST_LIST_ITER instIter, G4_INST *inst,
                             uint8_t hStride, G4_Type tempDstType,
                             G4_SubReg_Align subAlign = Any);

  bool isGoodAlign16Src(G4_INST *inst, int srcPos);
  bool isGoodAlign1TernarySrc(G4_INST *inst, int srcPos, bool canBeImm);
  bool isGoodAlign1TernaryDst(G4_INST *inst) const;
  void copyDwords(G4_Declare *dst, int dstOffset, G4_Declare *src,
                  int srcOffset, int numDwords, G4_BB *bb, INST_LIST_ITER iter);

  void copyDwordsIndirect(G4_Declare *dst, G4_SrcRegRegion *src, int numDwords,
                          G4_BB *bb, INST_LIST_ITER iter);

  void copyRegs(G4_Declare *dst, int dstOffset, G4_Declare *src, int srcOffset,
                int numRegs, G4_BB *bb, INST_LIST_ITER iter);

  bool isFpMadPreferred(G4_BB *bb, INST_LIST_ITER iter);
  bool generateFPMad(G4_BB *bb, INST_LIST_ITER iter);
  bool generateAlign1Mad(G4_BB *bb, INST_LIST_ITER iter);
  bool hasSameSubregOffset(G4_INST *inst) const;
  bool hasSameSubregOffset(G4_INST *inst, uint32_t &byteOffset) const;

  bool hasSameSubregOffsetSrnd(G4_DstRegRegion *dst,
                                         G4_SrcRegRegion *src) const;
  void fixImmAndARFSrc(INST_LIST_ITER it, G4_BB *bb);
  void generateMacl(INST_LIST_ITER it, G4_BB *bb);
  void doGenerateMacl(INST_LIST_ITER it, G4_BB *bb);

  void fixSelCsel(INST_LIST_ITER it, G4_BB *bb);

  void avoidDstSrcOverlap(PointsToAnalysis &p);
  void avoidInstDstSrcOverlap(INST_LIST_ITER it, G4_BB *bb,
                              PointsToAnalysis &p);

  void replaceHFBFwithFloat(INST_LIST_ITER it, G4_BB *bb);

  bool hasDPASSourceTwoReuse(DPASSrc2RSCache *src2GRFCache, G4_INST *inst);

  bool checkSrcMod(INST_LIST_ITER it, G4_BB *bb, int srcPos);

  void fixSrc2(INST_LIST_ITER it, G4_BB *bb, bool swapSrc0and2);

  void fixVxHFloat64b(INST_LIST_ITER it, G4_BB *bb);

  void fixPredCtrl(INST_LIST_ITER it, G4_BB *bb);

  // Calla src register must be grf aligned (sub-reg offset must be 0)
  void fixCalla(INST_LIST_ITER it, G4_BB *bb);

  // If alignment and region of all operands of any instruction are conformed
  // by a dedicated function, return true.
  // This is used to skip generic conformity functions, such as
  // fixOpndTypeAlign().
  bool hasDedicateAlignRegionConformity(INST_LIST_ITER it) const {
    return hasDedicateAlignRegionConformity(*it);
  }
  bool hasDedicateAlignRegionConformity(const G4_INST *I) const;

  void fixSrc1Region(INST_LIST_ITER it, G4_BB *bb);

  INST_LIST_ITER fixMadwInst(INST_LIST_ITER i, G4_BB *bb);

  void fixFloatARFDst(INST_LIST_ITER it, G4_BB *bb);

  void fixImmAddrOffsetOOB(INST_LIST_ITER it, G4_BB *bb);

protected:
  G4_DstRegRegion *insertMovAfter(INST_LIST_ITER &it, G4_DstRegRegion *dst,
                                  G4_Type type, G4_BB *bb,
                                  G4_SubReg_Align dstAlign = Any);
  G4_DstRegRegion *insertMovAfter(INST_LIST_ITER &it, G4_DstRegRegion *dst,
                                  G4_Type type, G4_BB *bb, uint16_t stride,
                                  G4_SubReg_Align dstAlign = Any);
  G4_Operand *insertMovBefore(INST_LIST_ITER it, uint32_t srcNum, G4_Type type,
                              G4_BB *bb, G4_SubReg_Align tmpAlign = Any);
  G4_Operand *insertMovBefore(INST_LIST_ITER it, uint32_t srcNum, G4_Type type,
                              G4_BB *bb, uint16_t stride,
                              G4_SubReg_Align tmpAlign = Any);

  // replace src <srcNum> for inst <*it> with a temp variable of type <type>
  // This is used to satisfy various HW restrictions on src
  // type/alignment/region/modifier/etc.
  void replaceSrc(INST_LIST_ITER it, uint32_t srcNum, G4_Type type, G4_BB *bb,
                  G4_SubReg_Align tmpAlign = Any) {
    G4_INST *inst = *it;
    inst->setSrc(insertMovBefore(it, srcNum, type, bb, tmpAlign), srcNum);
  }
  // replace dst for inst <*it> with a temp variable of type <type>
  // the original dst is now the dst of a new move instruction from the temp
  // variable. This is used to satisfy various HW restrictions on dst
  // type/alignment/etc.
  void replaceDst(INST_LIST_ITER it, G4_Type type,
                  G4_SubReg_Align dstAlign = Any) {
    G4_INST *inst = *it;
    inst->setDest(insertMovAfter(it, inst->getDst(), type, curBB, dstAlign));
  }

public:
  HWConformity(IR_Builder &b, G4_Kernel &k) : builder(b), kernel(k) {}
  void chkHWConformity();
  static void tryEliminateMadSrcModifier(IR_Builder &builder, G4_INST *inst);
  bool checkDPASSrcDstOverlap(INST_LIST_ITER iter, G4_BB *bb);
  G4_INST *evenlySplitDPAS8x8Inst(INST_LIST_ITER iter, G4_BB *bb);
  void DPASWA(G4_BB *bb, DPASSrc2RSCache *src2GRFCache);
  void localizeForAcc(G4_BB *bb);
  void splitDWMULInst(INST_LIST_ITER &start, INST_LIST_ITER &end, G4_BB *bb);
  void fixMulSrc1(INST_LIST_ITER i, G4_BB *bb);
  bool evenlySplitInst(INST_LIST_ITER iter, G4_BB *bb,
                       bool checkOverlap = true);
};
} // namespace vISA
// single entry point for HW conformity checks
extern void HWConformityChk(vISA::IR_Builder &builder, vISA::G4_Kernel &kernel);

#endif /* _HWCONFORMITY_H_ */