1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
|
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_OPT_LOOP_PEELING_H_
#define SOURCE_OPT_LOOP_PEELING_H_
#include <algorithm>
#include <limits>
#include <memory>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "source/opt/ir_context.h"
#include "source/opt/loop_descriptor.h"
#include "source/opt/loop_utils.h"
#include "source/opt/pass.h"
#include "source/opt/scalar_analysis.h"
namespace spvtools {
namespace opt {
// Utility class to perform the peeling of a given loop.
// The loop peeling transformation make a certain amount of a loop iterations to
// be executed either before (peel before) or after (peel after) the transformed
// loop.
//
// For peeling cases the transformation does the following steps:
// - It clones the loop and inserts the cloned loop before the original loop;
// - It connects all iterating values of the cloned loop with the
// corresponding original loop values so that the second loop starts with
// the appropriate values.
// - It inserts a new induction variable "i" is inserted into the cloned that
// starts with the value 0 and increment by step of one.
//
// The last step is specific to each case:
// - Peel before: the transformation is to peel the "N" first iterations.
// The exit condition of the cloned loop is changed so that the loop
// exits when "i < N" becomes false. The original loop is then protected to
// only execute if there is any iteration left to do.
// - Peel after: the transformation is to peel the "N" last iterations,
// then the exit condition of the cloned loop is changed so that the loop
// exits when "i + N < max_iteration" becomes false, where "max_iteration"
// is the upper bound of the loop. The cloned loop is then protected to
// only execute if there is any iteration left to do no covered by the
// second.
//
// To be peelable:
// - The loop must be in LCSSA form;
// - The loop must not contain any breaks;
// - The loop must not have any ambiguous iterators updates (see
// "CanPeelLoop").
// The method "CanPeelLoop" checks that those constrained are met.
class LoopPeeling {
public:
// LoopPeeling constructor.
// |loop| is the loop to peel.
// |loop_iteration_count| is the instruction holding the |loop| iteration
// count, must be invariant for |loop| and must be of an int 32 type (signed
// or unsigned).
// |canonical_induction_variable| is an induction variable that can be used to
// count the number of iterations, must be of the same type as
// |loop_iteration_count| and start at 0 and increase by step of one at each
// iteration. The value nullptr is interpreted as no suitable variable exists
// and one will be created.
LoopPeeling(Loop* loop, Instruction* loop_iteration_count,
Instruction* canonical_induction_variable = nullptr)
: context_(loop->GetContext()),
loop_utils_(loop->GetContext(), loop),
loop_(loop),
loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
? loop_iteration_count
: nullptr),
int_type_(nullptr),
original_loop_canonical_induction_variable_(
canonical_induction_variable),
canonical_induction_variable_(nullptr) {
if (loop_iteration_count_) {
int_type_ = context_->get_type_mgr()
->GetType(loop_iteration_count_->type_id())
->AsInteger();
if (canonical_induction_variable_) {
assert(canonical_induction_variable_->type_id() ==
loop_iteration_count_->type_id() &&
"loop_iteration_count and canonical_induction_variable do not "
"have the same type");
}
}
GetIteratingExitValues();
}
// Returns true if the loop can be peeled.
// To be peelable, all operation involved in the update of the loop iterators
// must not dominates the exit condition. This restriction is a work around to
// not miss compile code like:
//
// for (int i = 0; i + 1 < N; i++) {}
// for (int i = 0; ++i < N; i++) {}
//
// The increment will happen before the test on the exit condition leading to
// very look-a-like code.
//
// This restriction will not apply if a loop rotate is applied before (i.e.
// becomes a do-while loop).
bool CanPeelLoop() const {
CFG& cfg = *context_->cfg();
if (!loop_iteration_count_) {
return false;
}
if (!int_type_) {
return false;
}
if (int_type_->width() != 32) {
return false;
}
if (!loop_->IsLCSSA()) {
return false;
}
if (!loop_->GetMergeBlock()) {
return false;
}
if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
return false;
}
if (!IsConditionCheckSideEffectFree()) {
return false;
}
return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
[](std::pair<uint32_t, Instruction*> it) {
return it.second == nullptr;
});
}
// Moves the execution of the |factor| first iterations of the loop into a
// dedicated loop.
void PeelBefore(uint32_t factor);
// Moves the execution of the |factor| last iterations of the loop into a
// dedicated loop.
void PeelAfter(uint32_t factor);
// Returns the cloned loop.
Loop* GetClonedLoop() { return cloned_loop_; }
// Returns the original loop.
Loop* GetOriginalLoop() { return loop_; }
private:
IRContext* context_;
LoopUtils loop_utils_;
// The original loop.
Loop* loop_;
// The initial |loop_| upper bound.
Instruction* loop_iteration_count_;
// The int type to use for the canonical_induction_variable_.
analysis::Integer* int_type_;
// The cloned loop.
Loop* cloned_loop_;
// This is set to true when the exit and back-edge branch instruction is the
// same.
bool do_while_form_;
// The canonical induction variable from the original loop if it exists.
Instruction* original_loop_canonical_induction_variable_;
// The canonical induction variable of the cloned loop. The induction variable
// is initialized to 0 and incremented by step of 1.
Instruction* canonical_induction_variable_;
// Map between loop iterators and exit values. Loop iterators
std::unordered_map<uint32_t, Instruction*> exit_value_;
// Duplicate |loop_| and place the new loop before the cloned loop. Iterating
// values from the cloned loop are then connected to the original loop as
// initializer.
void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
// Insert the canonical induction variable into the first loop as a simplified
// counter.
void InsertCanonicalInductionVariable(
LoopUtils::LoopCloningResult* clone_results);
// Fixes the exit condition of the before loop. The function calls
// |condition_builder| to get the condition to use in the conditional branch
// of the loop exit. The loop will be exited if the condition evaluate to
// true. |condition_builder| takes an Instruction* that represent the
// insertion point.
void FixExitCondition(
const std::function<uint32_t(Instruction*)>& condition_builder);
// Gathers all operations involved in the update of |iterator| into
// |operations|.
void GetIteratorUpdateOperations(
const Loop* loop, Instruction* iterator,
std::unordered_set<Instruction*>* operations);
// Gathers exiting iterator values. The function builds a map between each
// iterating value in the loop (a phi instruction in the loop header) and its
// SSA value when it exit the loop. If no exit value can be accurately found,
// it is map to nullptr (see comment on CanPeelLoop).
void GetIteratingExitValues();
// Returns true if a for-loop has no instruction with effects before the
// condition check.
bool IsConditionCheckSideEffectFree() const;
// Creates a new basic block and insert it between |bb| and the predecessor of
// |bb|.
BasicBlock* CreateBlockBefore(BasicBlock* bb);
// Inserts code to only execute |loop| only if the given |condition| is true.
// |if_merge| is a suitable basic block to be used by the if condition as
// merge block.
// The function returns the if block protecting the loop.
BasicBlock* ProtectLoop(Loop* loop, Instruction* condition,
BasicBlock* if_merge);
};
// Implements a loop peeling optimization.
// For each loop, the pass will try to peel it if there is conditions that
// are true for the "N" first or last iterations of the loop.
// To avoid code size explosion, too large loops will not be peeled.
class LoopPeelingPass : public Pass {
public:
// Describes the peeling direction.
enum class PeelDirection {
kNone, // Cannot peel
kBefore, // Can peel before
kAfter // Can peel last
};
// Holds some statistics about peeled function.
struct LoopPeelingStats {
std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_;
};
LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {}
// Sets the loop peeling growth threshold. If the code size increase is above
// |code_grow_threshold|, the loop will not be peeled. The code size is
// measured in terms of SPIR-V instructions.
static void SetLoopPeelingThreshold(size_t code_grow_threshold) {
code_grow_threshold_ = code_grow_threshold;
}
// Returns the loop peeling code growth threshold.
static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; }
const char* name() const override { return "loop-peeling"; }
// Processes the given |module|. Returns Status::Failure if errors occur when
// processing. Returns the corresponding Status::Success if processing is
// succesful to indicate whether changes have been made to the modue.
Pass::Status Process() override;
private:
// Describes the peeling direction.
enum class CmpOperator {
kLT, // less than
kGT, // greater than
kLE, // less than or equal
kGE, // greater than or equal
};
class LoopPeelingInfo {
public:
using Direction = std::pair<PeelDirection, uint32_t>;
LoopPeelingInfo(Loop* loop, size_t loop_max_iterations,
ScalarEvolutionAnalysis* scev_analysis)
: context_(loop->GetContext()),
loop_(loop),
scev_analysis_(scev_analysis),
loop_max_iterations_(loop_max_iterations) {}
// Returns by how much and to which direction a loop should be peeled to
// make the conditional branch of the basic block |bb| an unconditional
// branch. If |bb|'s terminator is not a conditional branch or the condition
// is not workable then it returns PeelDirection::kNone and a 0 factor.
Direction GetPeelingInfo(BasicBlock* bb) const;
private:
// Returns the id of the loop invariant operand of the conditional
// expression |condition|. It returns if no operand is invariant.
uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const;
// Returns the id of the non loop invariant operand of the conditional
// expression |condition|. It returns if all operands are invariant.
uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const;
// Returns the value of |rec| at the first loop iteration.
SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const;
// Returns the value of |rec| at the given |iteration|.
SExpression GetValueAtIteration(SERecurrentNode* rec,
int64_t iteration) const;
// Returns the value of |rec| at the last loop iteration.
SExpression GetValueAtLastIteration(SERecurrentNode* rec) const;
bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs,
bool* result) const;
Direction HandleEquality(SExpression lhs, SExpression rhs) const;
Direction HandleInequality(CmpOperator cmp_op, SExpression lhs,
SERecurrentNode* rhs) const;
static Direction GetNoneDirection() {
return Direction{LoopPeelingPass::PeelDirection::kNone, 0};
}
IRContext* context_;
Loop* loop_;
ScalarEvolutionAnalysis* scev_analysis_;
size_t loop_max_iterations_;
};
// Peel profitable loops in |f|.
bool ProcessFunction(Function* f);
// Peel |loop| if profitable.
std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size);
static size_t code_grow_threshold_;
LoopPeelingStats* stats_;
};
} // namespace opt
} // namespace spvtools
#endif // SOURCE_OPT_LOOP_PEELING_H_
|