File: VPlanHelpers.h

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,245,028 kB
  • sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,675; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (466 lines) | stat: -rw-r--r-- 16,749 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
//===- VPlanHelpers.h - VPlan-related auxiliary helpers -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file contains the declarations of different VPlan-related auxiliary
/// helpers.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H

#include "VPlanAnalysis.h"
#include "VPlanDominatorTree.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/Support/InstructionCost.h"

namespace llvm {

class AssumptionCache;
class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
class IRBuilderBase;
class LoopInfo;
class SCEV;
class Type;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
class Value;

/// Returns a calculation for the total number of elements for a given \p VF.
/// For fixed width vectors this value is a constant, whereas for scalable
/// vectors it is an expression determined at runtime.
Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);

/// Return a value for Step multiplied by VF.
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
                       int64_t Step);

/// A helper function that returns how much we should divide the cost of a
/// predicated block by. Typically this is the reciprocal of the block
/// probability, i.e. if we return X we are assuming the predicated block will
/// execute once for every X iterations of the loop header so the block should
/// only contribute 1/X of its cost to the total cost calculation, but when
/// optimizing for code size it will just be 1 as code size costs don't depend
/// on execution probabilities.
///
/// TODO: We should use actual block probability here, if available. Currently,
///       we always assume predicated blocks have a 50% chance of executing.
inline unsigned
getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind) {
  return CostKind == TTI::TCK_CodeSize ? 1 : 2;
}

/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 16) = {1, 2, 4, 8}
struct VFRange {
  // A power of 2.
  const ElementCount Start;

  // A power of 2. If End <= Start range is empty.
  ElementCount End;

  bool isEmpty() const {
    return End.getKnownMinValue() <= Start.getKnownMinValue();
  }

  VFRange(const ElementCount &Start, const ElementCount &End)
      : Start(Start), End(End) {
    assert(Start.isScalable() == End.isScalable() &&
           "Both Start and End should have the same scalable flag");
    assert(isPowerOf2_32(Start.getKnownMinValue()) &&
           "Expected Start to be a power of 2");
    assert(isPowerOf2_32(End.getKnownMinValue()) &&
           "Expected End to be a power of 2");
  }

  /// Iterator to iterate over vectorization factors in a VFRange.
  class iterator
      : public iterator_facade_base<iterator, std::forward_iterator_tag,
                                    ElementCount> {
    ElementCount VF;

  public:
    iterator(ElementCount VF) : VF(VF) {}

    bool operator==(const iterator &Other) const { return VF == Other.VF; }

    ElementCount operator*() const { return VF; }

    iterator &operator++() {
      VF *= 2;
      return *this;
    }
  };

  iterator begin() { return iterator(Start); }
  iterator end() {
    assert(isPowerOf2_32(End.getKnownMinValue()));
    return iterator(End);
  }
};

/// In what follows, the term "input IR" refers to code that is fed into the
/// vectorizer whereas the term "output IR" refers to code that is generated by
/// the vectorizer.

/// VPLane provides a way to access lanes in both fixed width and scalable
/// vectors, where for the latter the lane index sometimes needs calculating
/// as a runtime expression.
class VPLane {
public:
  /// Kind describes how to interpret Lane.
  enum class Kind : uint8_t {
    /// For First, Lane is the index into the first N elements of a
    /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
    First,
    /// For ScalableLast, Lane is the offset from the start of the last
    /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
    /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
    /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
    ScalableLast
  };

private:
  /// in [0..VF)
  unsigned Lane;

  /// Indicates how the Lane should be interpreted, as described above.
  Kind LaneKind = Kind::First;

public:
  VPLane(unsigned Lane) : Lane(Lane) {}
  VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}

  static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }

  static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
    assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
           "trying to extract with invalid offset");
    unsigned LaneOffset = VF.getKnownMinValue() - Offset;
    Kind LaneKind;
    if (VF.isScalable())
      // In this case 'LaneOffset' refers to the offset from the start of the
      // last subvector with VF.getKnownMinValue() elements.
      LaneKind = VPLane::Kind::ScalableLast;
    else
      LaneKind = VPLane::Kind::First;
    return VPLane(LaneOffset, LaneKind);
  }

  static VPLane getLastLaneForVF(const ElementCount &VF) {
    return getLaneFromEnd(VF, 1);
  }

  /// Returns a compile-time known value for the lane index and asserts if the
  /// lane can only be calculated at runtime.
  unsigned getKnownLane() const {
    assert(LaneKind == Kind::First &&
           "can only get known lane from the beginning");
    return Lane;
  }

  /// Returns an expression describing the lane index that can be used at
  /// runtime.
  Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;

  /// Returns the Kind of lane offset.
  Kind getKind() const { return LaneKind; }

  /// Returns true if this is the first lane of the whole vector.
  bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }

  /// Maps the lane to a cache index based on \p VF.
  unsigned mapToCacheIndex(const ElementCount &VF) const {
    switch (LaneKind) {
    case VPLane::Kind::ScalableLast:
      assert(VF.isScalable() && Lane < VF.getKnownMinValue() &&
             "ScalableLast can only be used with scalable VFs");
      return VF.getKnownMinValue() + Lane;
    default:
      assert(Lane < VF.getKnownMinValue() &&
             "Cannot extract lane larger than VF");
      return Lane;
    }
  }
};

/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
  VPTransformState(const TargetTransformInfo *TTI, ElementCount VF,
                   LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
                   IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop,
                   Type *CanonicalIVTy);
  /// Target Transform Info.
  const TargetTransformInfo *TTI;

  /// The chosen Vectorization Factor of the loop being vectorized.
  ElementCount VF;

  /// Hold the index to generate specific scalar instructions. Null indicates
  /// that all instances are to be generated, using either scalar or vector
  /// instructions.
  std::optional<VPLane> Lane;

  struct DataState {
    // Each value from the original loop, when vectorized, is represented by a
    // vector value in the map.
    DenseMap<const VPValue *, Value *> VPV2Vector;

    DenseMap<const VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
  } Data;

  /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
  /// is false, otherwise return the generated scalar. \See set.
  Value *get(const VPValue *Def, bool IsScalar = false);

  /// Get the generated Value for a given VPValue and given Part and Lane.
  Value *get(const VPValue *Def, const VPLane &Lane);

  bool hasVectorValue(const VPValue *Def) {
    return Data.VPV2Vector.contains(Def);
  }

  bool hasScalarValue(const VPValue *Def, VPLane Lane) {
    auto I = Data.VPV2Scalars.find(Def);
    if (I == Data.VPV2Scalars.end())
      return false;
    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
    return CacheIdx < I->second.size() && I->second[CacheIdx];
  }

  /// Set the generated vector Value for a given VPValue, if \p
  /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
  void set(const VPValue *Def, Value *V, bool IsScalar = false) {
    if (IsScalar) {
      set(Def, V, VPLane(0));
      return;
    }
    assert((VF.isScalar() || isVectorizedTy(V->getType())) &&
           "scalar values must be stored as (0, 0)");
    Data.VPV2Vector[Def] = V;
  }

  /// Reset an existing vector value for \p Def and a given \p Part.
  void reset(const VPValue *Def, Value *V) {
    assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
    Data.VPV2Vector[Def] = V;
  }

  /// Set the generated scalar \p V for \p Def and the given \p Lane.
  void set(const VPValue *Def, Value *V, const VPLane &Lane) {
    auto &Scalars = Data.VPV2Scalars[Def];
    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
    if (Scalars.size() <= CacheIdx)
      Scalars.resize(CacheIdx + 1);
    assert(!Scalars[CacheIdx] && "should overwrite existing value");
    Scalars[CacheIdx] = V;
  }

  /// Reset an existing scalar value for \p Def and a given \p Lane.
  void reset(const VPValue *Def, Value *V, const VPLane &Lane) {
    auto Iter = Data.VPV2Scalars.find(Def);
    assert(Iter != Data.VPV2Scalars.end() &&
           "need to overwrite existing value");
    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
    assert(CacheIdx < Iter->second.size() &&
           "need to overwrite existing value");
    Iter->second[CacheIdx] = V;
  }

  /// Set the debug location in the builder using the debug location \p DL.
  void setDebugLocFrom(DebugLoc DL);

  /// Insert the scalar value of \p Def at \p Lane into \p Lane of \p WideValue
  /// and return the resulting value.
  Value *packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue,
                                       const VPLane &Lane);

  /// Hold state information used when constructing the CFG of the output IR,
  /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
  struct CFGState {
    /// The previous VPBasicBlock visited. Initially set to null.
    VPBasicBlock *PrevVPBB = nullptr;

    /// The previous IR BasicBlock created or used. Initially set to the new
    /// header BasicBlock.
    BasicBlock *PrevBB = nullptr;

    /// The last IR BasicBlock in the output IR. Set to the exit block of the
    /// vector loop.
    BasicBlock *ExitBB = nullptr;

    /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
    /// of replication, maps the BasicBlock of the last replica created.
    SmallDenseMap<const VPBasicBlock *, BasicBlock *> VPBB2IRBB;

    /// Updater for the DominatorTree.
    DomTreeUpdater DTU;

    CFGState(DominatorTree *DT)
        : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
  } CFG;

  /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
  LoopInfo *LI;

  /// Hold a pointer to AssumptionCache to register new assumptions after
  /// replicating assume calls.
  AssumptionCache *AC;

  /// Hold a reference to the IRBuilder used to generate output IR code.
  IRBuilderBase &Builder;

  /// Pointer to the VPlan code is generated for.
  VPlan *Plan;

  /// The parent loop object for the current scope, or nullptr.
  Loop *CurrentParentLoop = nullptr;

  /// VPlan-based type analysis.
  VPTypeAnalysis TypeAnalysis;

  /// VPlan-based dominator tree.
  VPDominatorTree VPDT;
};

/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
  const TargetTransformInfo &TTI;
  const TargetLibraryInfo &TLI;
  VPTypeAnalysis Types;
  LLVMContext &LLVMCtx;
  LoopVectorizationCostModel &CM;
  SmallPtrSet<Instruction *, 8> SkipCostComputation;
  TargetTransformInfo::TargetCostKind CostKind;

  VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
                Type *CanIVTy, LoopVectorizationCostModel &CM,
                TargetTransformInfo::TargetCostKind CostKind)
      : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
        CM(CM), CostKind(CostKind) {}

  /// Return the cost for \p UI with \p VF using the legacy cost model as
  /// fallback until computing the cost of all recipes migrates to VPlan.
  InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;

  /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
  /// has already been pre-computed.
  bool skipCostComputation(Instruction *UI, bool IsVector) const;

  /// Returns the OperandInfo for \p V, if it is a live-in.
  TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;

  /// Return true if \p I is considered uniform-after-vectorization in the
  /// legacy cost model for \p VF. Only used to check for additional VPlan
  /// simplifications.
  bool isLegacyUniformAfterVectorization(Instruction *I, ElementCount VF) const;
};

/// This class can be used to assign names to VPValues. For VPValues without
/// underlying value, assign consecutive numbers and use those as names (wrapped
/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
/// ir<>), appending a .V version number if there are multiple uses of the same
/// name. Allows querying names for VPValues for printing, similar to the
/// ModuleSlotTracker for IR values.
class VPSlotTracker {
  /// Keep track of versioned names assigned to VPValues with underlying IR
  /// values.
  DenseMap<const VPValue *, std::string> VPValue2Name;
  /// Keep track of the next number to use to version the base name.
  StringMap<unsigned> BaseName2Version;

  /// Number to assign to the next VPValue without underlying value.
  unsigned NextSlot = 0;

  /// Lazily created ModuleSlotTracker, used only when unnamed IR instructions
  /// require slot tracking.
  std::unique_ptr<ModuleSlotTracker> MST;

  void assignName(const VPValue *V);
  void assignNames(const VPlan &Plan);
  void assignNames(const VPBasicBlock *VPBB);
  std::string getName(const Value *V);

public:
  VPSlotTracker(const VPlan *Plan = nullptr) {
    if (Plan)
      assignNames(*Plan);
  }

  /// Returns the name assigned to \p V, if there is one, otherwise try to
  /// construct one from the underlying value, if there's one; else return
  /// <badref>.
  std::string getOrCreateName(const VPValue *V) const;
};

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
/// indented and follows the dot format.
class VPlanPrinter {
  raw_ostream &OS;
  const VPlan &Plan;
  unsigned Depth = 0;
  unsigned TabWidth = 2;
  std::string Indent;
  unsigned BID = 0;
  SmallDenseMap<const VPBlockBase *, unsigned> BlockID;

  VPSlotTracker SlotTracker;

  /// Handle indentation.
  void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }

  /// Print a given \p Block of the Plan.
  void dumpBlock(const VPBlockBase *Block);

  /// Print the information related to the CFG edges going out of a given
  /// \p Block, followed by printing the successor blocks themselves.
  void dumpEdges(const VPBlockBase *Block);

  /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
  /// its successor blocks.
  void dumpBasicBlock(const VPBasicBlock *BasicBlock);

  /// Print a given \p Region of the Plan.
  void dumpRegion(const VPRegionBlock *Region);

  unsigned getOrCreateBID(const VPBlockBase *Block) {
    return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
  }

  Twine getOrCreateName(const VPBlockBase *Block);

  Twine getUID(const VPBlockBase *Block);

  /// Print the information related to a CFG edge between two VPBlockBases.
  void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
                const Twine &Label);

public:
  VPlanPrinter(raw_ostream &O, const VPlan &P)
      : OS(O), Plan(P), SlotTracker(&P) {}

  LLVM_DUMP_METHOD void dump();
};
#endif

} // end namespace llvm

#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H