File: VPRecipeBuilder.h

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,245,028 kB
  • sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,675; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (226 lines) | stat: -rw-r--r-- 9,333 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
//===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H
#define LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H

#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"

namespace llvm {

class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class TargetLibraryInfo;
class TargetTransformInfo;
struct HistogramInfo;
struct VFRange;

/// A chain of instructions that form a partial reduction.
/// Designed to match either:
///   reduction_bin_op (extend (A), accumulator), or
///   reduction_bin_op (bin_op (extend (A), (extend (B))), accumulator).
struct PartialReductionChain {
  PartialReductionChain(Instruction *Reduction, Instruction *ExtendA,
                        Instruction *ExtendB, Instruction *ExtendUser)
      : Reduction(Reduction), ExtendA(ExtendA), ExtendB(ExtendB),
        ExtendUser(ExtendUser) {}
  /// The top-level binary operation that forms the reduction to a scalar
  /// after the loop body.
  Instruction *Reduction;
  /// The extension of each of the inner binary operation's operands.
  Instruction *ExtendA;
  Instruction *ExtendB;

  /// The user of the extends that is then reduced.
  Instruction *ExtendUser;
};

/// Helper class to create VPRecipies from IR instructions.
class VPRecipeBuilder {
  /// The VPlan new recipes are added to.
  VPlan &Plan;

  /// The loop that we evaluate.
  Loop *OrigLoop;

  /// Target Library Info.
  const TargetLibraryInfo *TLI;

  // Target Transform Info.
  const TargetTransformInfo *TTI;

  /// The legality analysis.
  LoopVectorizationLegality *Legal;

  /// The profitablity analysis.
  LoopVectorizationCostModel &CM;

  PredicatedScalarEvolution &PSE;

  VPBuilder &Builder;

  /// The mask of each VPBB, generated earlier and used for predicating recipes
  /// in VPBB.
  /// TODO: remove by applying predication when generating the masks.
  DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;

  // VPlan construction support: Hold a mapping from ingredients to
  // their recipe.
  DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;

  /// Cross-iteration reduction & first-order recurrence phis for which we need
  /// to add the incoming value from the backedge after all recipes have been
  /// created.
  SmallVector<VPHeaderPHIRecipe *, 4> PhisToFix;

  /// A mapping of partial reduction exit instructions to their scaling factor.
  DenseMap<const Instruction *, unsigned> ScaledReductionMap;

  /// Loop versioning instance for getting noalias metadata guaranteed by
  /// runtime checks.
  LoopVersioning *LVer;

  /// Check if \p I can be widened at the start of \p Range and possibly
  /// decrease the range such that the returned value holds for the entire \p
  /// Range. The function should not be called for memory instructions or calls.
  bool shouldWiden(Instruction *I, VFRange &Range) const;

  /// Check if the load or store instruction \p I should widened for \p
  /// Range.Start and potentially masked. Such instructions are handled by a
  /// recipe that takes an additional VPInstruction for the mask.
  VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
                                        ArrayRef<VPValue *> Operands,
                                        VFRange &Range);

  /// Check if an induction recipe should be constructed for \p Phi. If so build
  /// and return it. If not, return null.
  VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
                                               ArrayRef<VPValue *> Operands,
                                               VFRange &Range);

  /// Optimize the special case where the operand of \p I is a constant integer
  /// induction variable.
  VPWidenIntOrFpInductionRecipe *
  tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
                                 VFRange &Range);

  /// Handle call instructions. If \p CI can be widened for \p Range.Start,
  /// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
  /// decreased to ensure same decision from \p Range.Start to \p Range.End.
  VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
                                    VFRange &Range);

  /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
  /// if it can. The function should only be called if the cost-model indicates
  /// that widening should be performed.
  VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands);

  /// Makes Histogram count operations safe for vectorization, by emitting a
  /// llvm.experimental.vector.histogram.add intrinsic in place of the
  /// Load + Add|Sub + Store operations that perform the histogram in the
  /// original scalar loop.
  VPHistogramRecipe *tryToWidenHistogram(const HistogramInfo *HI,
                                         ArrayRef<VPValue *> Operands);

  /// Examines reduction operations to see if the target can use a cheaper
  /// operation with a wider per-iteration input VF and narrower PHI VF.
  /// Each element within Chains is a pair with a struct containing reduction
  /// information and the scaling factor between the number of elements in
  /// the input and output.
  /// Recursively calls itself to identify chained scaled reductions.
  /// Returns true if this invocation added an entry to Chains, otherwise false.
  /// i.e. returns false in the case that a subcall adds an entry to Chains,
  /// but the top-level call does not.
  bool getScaledReductions(
      Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range,
      SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains);

public:
  VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI,
                  const TargetTransformInfo *TTI,
                  LoopVectorizationLegality *Legal,
                  LoopVectorizationCostModel &CM,
                  PredicatedScalarEvolution &PSE, VPBuilder &Builder,
                  DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
                  LoopVersioning *LVer)
      : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
        CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
        LVer(LVer) {}

  std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
    auto It = ScaledReductionMap.find(ExitInst);
    return It == ScaledReductionMap.end() ? std::nullopt
                                          : std::make_optional(It->second);
  }

  /// Find all possible partial reductions in the loop and track all of those
  /// that are valid so recipes can be formed later.
  void collectScaledReductions(VFRange &Range);

  /// Create and return a widened recipe for \p R if one can be created within
  /// the given VF \p Range.
  VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range);

  /// Create and return a partial reduction recipe for a reduction instruction
  /// along with binary operation and reduction phi operands.
  VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction,
                                            ArrayRef<VPValue *> Operands,
                                            unsigned ScaleFactor);

  /// Set the recipe created for given ingredient.
  void setRecipe(Instruction *I, VPRecipeBase *R) {
    assert(!Ingredient2Recipe.contains(I) &&
           "Cannot reset recipe for instruction.");
    Ingredient2Recipe[I] = R;
  }

  /// Returns the *entry* mask for block \p VPBB or null if the mask is
  /// all-true.
  VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
    return BlockMaskCache.lookup(VPBB);
  }

  /// Return the recipe created for given ingredient.
  VPRecipeBase *getRecipe(Instruction *I) {
    assert(Ingredient2Recipe.count(I) &&
           "Recording this ingredients recipe was not requested");
    assert(Ingredient2Recipe[I] != nullptr &&
           "Ingredient doesn't have a recipe");
    return Ingredient2Recipe[I];
  }

  /// Build a VPReplicationRecipe for \p I using \p Operands. If it is
  /// predicated, add the mask as last operand. Range.End may be decreased to
  /// ensure same recipe behavior from \p Range.Start to \p Range.End.
  VPReplicateRecipe *handleReplication(Instruction *I,
                                       ArrayRef<VPValue *> Operands,
                                       VFRange &Range);

  VPValue *getVPValueOrAddLiveIn(Value *V) {
    if (auto *I = dyn_cast<Instruction>(V)) {
      if (auto *R = Ingredient2Recipe.lookup(I))
        return R->getVPSingleValue();
    }
    return Plan.getOrAddLiveIn(V);
  }

  void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
    for (auto &[_, V] : BlockMaskCache) {
      if (auto *New = Old2New.lookup(V)) {
        V->replaceAllUsesWith(New);
        V = New;
      }
    }
  }
};
} // end namespace llvm

#endif // LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H