File: SpillCleanup.h

package info (click to toggle)
intel-graphics-compiler2 2.28.4-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 792,744 kB
  • sloc: cpp: 5,761,745; ansic: 466,928; lisp: 312,143; python: 114,790; asm: 44,736; pascal: 10,930; sh: 8,033; perl: 7,914; ml: 3,625; awk: 3,523; yacc: 2,747; javascript: 2,667; lex: 1,898; f90: 1,028; cs: 573; xml: 474; makefile: 344; objc: 162
file content (160 lines) | stat: -rw-r--r-- 6,341 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#ifndef __SPILLCLEANUP_H__
#define __SPILLCLEANUP_H__

#include "FlowGraph.h"
#include "G4_IR.hpp"
#include "RPE.h"

namespace vISA {
class CoalesceSpillFills {
private:
  G4_Kernel &kernel;
  LivenessAnalysis &liveness;
  GraphColor &graphColor;
  GlobalRA &gra;
  SpillManagerGRF &spill;
  unsigned int iterNo;
  // Store declares spilled by sends like sampler
  std::set<G4_Declare *> sendDstDcl;
  RPE &rpe;
  // Spill cleanup is a per-BB optimization. Store current BB being optimized.
  G4_BB *curBB = nullptr;
  bool isCm = false;

  // Set window size to coalesce
  const unsigned int cWindowSize = 10;
  const unsigned int cMaxWindowSize = 20;
  const unsigned int cMaxFillPayloadSize = 4;
  const unsigned int cMaxSpillPayloadSize = 4;
  const unsigned int cSpillFillCleanupWindowSize = 10;
  const unsigned int cFillWindowThreshold128GRF = 180;
  const unsigned int cSpillWindowThreshold128GRF = 120;
  const unsigned int cHighRegPressureForCleanup = 100;
  const unsigned int cHighRegPressureForWindow = 70;
  const unsigned int cInputSizeLimit = 70;

  unsigned int fillWindowSizeThreshold = 0;
  unsigned int spillWindowSizeThreshold = 0;
  unsigned int highRegPressureForCleanup = 0;
  unsigned int highRegPressureForWindow = 0;
  unsigned int inputSizeLimit = 0;
  unsigned int spillFillCleanupWindowSize = 0;
  unsigned int totalInputSize = 0;

  // Debug flags
  unsigned int spillCleanupStartBBId = 0;
  unsigned int spillCleanupEndBBId = 0xffffffff;

  bool isSpillCleanupEnabled(const G4_BB *bb) const {
    auto bbId = bb->getId();
    return (bbId >= spillCleanupStartBBId && bbId <= spillCleanupEndBBId);
  }

  // <Old fill declare*, std::pair<Coalesced Decl*, Row Off>>
  // This data structure is used to replaced old spill/fill operands
  // with coalesced operands with correct offset.
  std::map<G4_Declare *, std::pair<G4_Declare *, unsigned int>> replaceMap;

  bool replaceCoalescedOperands(G4_INST *);

  void dumpKernel();
  void dumpKernel(unsigned int v1, unsigned int v2);

  bool notOOB(unsigned int min, unsigned int max);
  void sendsInRange(std::list<INST_LIST_ITER> &, std::list<INST_LIST_ITER> &,
                    unsigned int, unsigned int &, unsigned int &);
  void keepConsecutiveSpills(std::list<INST_LIST_ITER> &,
                             std::list<INST_LIST_ITER> &, unsigned int,
                             unsigned int &, unsigned int &, bool &,
                             G4_InstOption &);
  void fills();
  void spills();
  INST_LIST_ITER analyzeFillCoalescing(std::list<INST_LIST_ITER> &,
                                       INST_LIST_ITER, INST_LIST_ITER);
  INST_LIST_ITER analyzeSpillCoalescing(std::list<INST_LIST_ITER> &,
                                        INST_LIST_ITER, INST_LIST_ITER);
  void removeWARFills(std::list<INST_LIST_ITER> &, std::list<INST_LIST_ITER> &);
  void coalesceFills(std::list<INST_LIST_ITER> &, unsigned int, unsigned int);
  G4_INST *generateCoalescedFill(G4_SrcRegRegion *, unsigned int, unsigned int,
                                 unsigned int, bool);
  G4_SrcRegRegion *generateCoalescedSpill(G4_SrcRegRegion *, unsigned int,
                                          unsigned int, bool, G4_InstOption,
                                          G4_Declare *, unsigned int);
  bool fillHeuristic(std::list<INST_LIST_ITER> &, std::list<INST_LIST_ITER> &,
                     const std::list<INST_LIST_ITER> &, unsigned int &,
                     unsigned int &);
  bool overlap(G4_INST *, std::list<INST_LIST_ITER> &);
  bool overlap(G4_INST *, G4_INST *, bool &);
  void coalesceSpills(std::list<INST_LIST_ITER> &, unsigned int, unsigned int,
                      bool, G4_InstOption);
  bool allSpillsSameVar(std::list<INST_LIST_ITER> &);
  void fixSendsSrcOverlap();
  void removeRedundantSplitMovs();
  G4_Declare *createCoalescedSpillDcl(unsigned int);
  void populateSendDstDcl();
  void spillFillCleanup();
  void removeRedundantWrites();
  // For Cm, if BB is in divergent CF and spill inst and fill inst have
  // mismatched WriteEnable bit then return true as cleanup may be
  // illegal.
  bool isIncompatibleEMCm(G4_INST *inst1, G4_INST *inst2) const;

public:
  CoalesceSpillFills(G4_Kernel &k, LivenessAnalysis &l, GraphColor &g,
                     SpillManagerGRF &s, unsigned int iterationNo, RPE &r,
                     GlobalRA &gr)
      : kernel(k), liveness(l), graphColor(g), gra(gr), spill(s),
        iterNo(iterationNo), rpe(r) {
    fillWindowSizeThreshold =
        kernel.getScaledGRFSize(cFillWindowThreshold128GRF);
    spillWindowSizeThreshold =
        kernel.getScaledGRFSize(cSpillWindowThreshold128GRF);
    highRegPressureForCleanup =
        kernel.getScaledGRFSize(cHighRegPressureForCleanup);
    highRegPressureForWindow =
        kernel.getScaledGRFSize(cHighRegPressureForWindow);
    inputSizeLimit = kernel.getScaledGRFSize(cInputSizeLimit);
    spillFillCleanupWindowSize = std::min<unsigned int>(
        kernel.getScaledGRFSize(cSpillFillCleanupWindowSize),
        cSpillFillCleanupWindowSize);

    auto &inputs = k.fg.builder->m_inputVect;
    for (const input_info_t *input_info : inputs) {
      totalInputSize += input_info->size;
    }
    totalInputSize = totalInputSize / k.numEltPerGRF<Type_UB>();

    isCm = (kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM);

    spillCleanupStartBBId =
        kernel.getOptions()->getuInt32Option(vISA_SpillCleanupStartBBID);
    spillCleanupEndBBId =
        kernel.getOptions()->getuInt32Option(vISA_SpillCleanupEndBBID);
  }

  void run();

  static void getScratchMsgInfo(G4_INST *inst, unsigned int &scratchOffset,
                                unsigned int &size) {
    if (inst->isSpillIntrinsic()) {
      scratchOffset = inst->asSpillIntrinsic()->getOffset();
      size = inst->asSpillIntrinsic()->getNumRows();
    } else if (inst->isFillIntrinsic()) {
      scratchOffset = inst->asFillIntrinsic()->getOffset();
      size = inst->asFillIntrinsic()->getNumRows();
    } else {
      vISA_ASSERT(false, "unknown inst type");
    }
  }
};
} // namespace vISA

#endif