1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
|
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
#include <algorithm>
#include <sstream>
#include "base/arena_allocator.h"
#include "base/arena_bit_vector.h"
#include "base/arena_containers.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
#include "base/globals.h"
#include "base/iteration_range.h"
#include "base/macros.h"
#include "base/mutex.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
#include "base/transform_iterator.h"
#include "nodes.h"
namespace art HIDDEN {
// Helper for transforming blocks to block_ids.
class BlockToBlockIdTransformer {
public:
BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default;
BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default;
BlockToBlockIdTransformer() {}
inline uint32_t operator()(const HBasicBlock* b) const {
return b->GetBlockId();
}
};
// Helper for transforming block ids to blocks.
class BlockIdToBlockTransformer {
public:
BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default;
BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default;
explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {}
inline const HGraph* GetGraph() const {
return graph_;
}
inline HBasicBlock* GetBlock(uint32_t id) const {
DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod();
HBasicBlock* blk = graph_->GetBlocks()[id];
DCHECK(blk != nullptr);
return blk;
}
inline HBasicBlock* operator()(uint32_t id) const {
return GetBlock(id);
}
private:
const HGraph* const graph_;
};
class BlockIdFilterThunk {
public:
explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {}
BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default;
BlockIdFilterThunk(const BlockIdFilterThunk&) = default;
bool operator()(const HBasicBlock* b) const {
return inner_.IsBitSet(b->GetBlockId());
}
private:
const BitVector& inner_;
};
// A representation of a particular section of the graph. The graph is split
// into an excluded and included area and is used to track escapes.
//
// This object is a view of the graph and is not updated as the graph is
// changed.
//
// This is implemented by removing various escape points from the subgraph using
// the 'RemoveBlock' function. Once all required blocks are removed one will
// 'Finalize' the subgraph. This will extend the removed area to include:
// (1) Any block which inevitably leads to (post-dominates) a removed block
// (2) any block which is between 2 removed blocks
//
// This allows us to create a set of 'ExcludedCohorts' which are the
// well-connected subsets of the graph made up of removed blocks. These cohorts
// have a set of entry and exit blocks which act as the boundary of the cohort.
// Since we removed blocks between 2 excluded blocks it is impossible for any
// cohort-exit block to reach any cohort-entry block. This means we can use the
// boundary between the cohort and the rest of the graph to insert
// materialization blocks for partial LSE.
//
// TODO We really should expand this to take into account where the object
// allocation takes place directly. Currently we always act as though it were
// allocated in the entry block. This is a massively simplifying assumption but
// means we can't partially remove objects that are repeatedly allocated in a
// loop.
class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> {
public:
using BitVecBlockRange =
IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>;
using FilteredBitVecBlockRange = IterationRange<
FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>;
// A set of connected blocks which are connected and removed from the
// ExecutionSubgraph. See above comment for explanation.
class ExcludedCohort : public ArenaObject<kArenaAllocLSA> {
public:
ExcludedCohort(ExcludedCohort&&) = default;
ExcludedCohort(const ExcludedCohort&) = delete;
explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph)
: graph_(graph),
entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {}
~ExcludedCohort() = default;
// All blocks in the cohort.
BitVecBlockRange Blocks() const {
return BlockIterRange(blocks_);
}
// Blocks that have predecessors outside of the cohort. These blocks will
// need to have PHIs/control-flow added to create the escaping value.
BitVecBlockRange EntryBlocks() const {
return BlockIterRange(entry_blocks_);
}
FilteredBitVecBlockRange EntryBlocksReversePostOrder() const {
return Filter(MakeIterationRange(graph_->GetReversePostOrder()),
BlockIdFilterThunk(entry_blocks_));
}
bool IsEntryBlock(const HBasicBlock* blk) const {
return entry_blocks_.IsBitSet(blk->GetBlockId());
}
// Blocks that have successors outside of the cohort. The successors of
// these blocks will need to have PHI's to restore state.
BitVecBlockRange ExitBlocks() const {
return BlockIterRange(exit_blocks_);
}
bool operator==(const ExcludedCohort& other) const {
return blocks_.Equal(&other.blocks_);
}
bool ContainsBlock(const HBasicBlock* blk) const {
return blocks_.IsBitSet(blk->GetBlockId());
}
// Returns true if there is a path from 'blk' to any block in this cohort.
// NB blocks contained within the cohort are not considered to be succeeded
// by the cohort (i.e. this function will return false).
bool SucceedsBlock(const HBasicBlock* blk) const {
if (ContainsBlock(blk)) {
return false;
}
auto idxs = entry_blocks_.Indexes();
return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool {
return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry);
});
}
// Returns true if there is a path from any block in this cohort to 'blk'.
// NB blocks contained within the cohort are not considered to be preceded
// by the cohort (i.e. this function will return false).
bool PrecedesBlock(const HBasicBlock* blk) const {
if (ContainsBlock(blk)) {
return false;
}
auto idxs = exit_blocks_.Indexes();
return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool {
return blk->GetGraph()->PathBetween(exit, blk->GetBlockId());
});
}
void Dump(std::ostream& os) const;
private:
BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const {
auto indexes = bv.Indexes();
BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_));
return res;
}
ExcludedCohort() = delete;
HGraph* graph_;
ArenaBitVector entry_blocks_;
ArenaBitVector exit_blocks_;
ArenaBitVector blocks_;
friend class ExecutionSubgraph;
friend class LoadStoreAnalysisTest;
};
// The number of successors we can track on a single block. Graphs which
// contain a block with a branching factor greater than this will not be
// analysed. This is used to both limit the memory usage of analysis to
// reasonable levels and ensure that the analysis will complete in a
// reasonable amount of time. It also simplifies the implementation somewhat
// to have a constant branching factor.
static constexpr uint32_t kMaxFilterableSuccessors = 8;
// Instantiate a subgraph. The subgraph can be instantiated only if partial-escape
// analysis is desired (eg not when being used for instruction scheduling) and
// when the branching factor in the graph is not too high. These conditions
// are determined once and passed down for performance reasons.
ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator);
void Invalidate() {
valid_ = false;
}
// A block is contained by the ExecutionSubgraph if it is reachable. This
// means it has not been removed explicitly or via pruning/concavity removal.
// Finalization is needed to call this function.
// See RemoveConcavity and Prune for more information.
bool ContainsBlock(const HBasicBlock* blk) const {
DCHECK_IMPLIES(finalized_, !needs_prune_);
if (!valid_) {
return false;
}
return !unreachable_blocks_.IsBitSet(blk->GetBlockId());
}
// Mark the block as removed from the subgraph.
void RemoveBlock(const HBasicBlock* to_remove);
// Called when no more updates will be done to the subgraph. Calculate the
// final subgraph
void Finalize() {
Prune();
RemoveConcavity();
finalized_ = true;
}
BitVecBlockRange UnreachableBlocks() const {
auto idxs = unreachable_blocks_.Indexes();
return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_));
}
// Returns true if all allowed execution paths from start eventually reach the
// graph's exit block (or diverge).
bool IsValid() const {
return valid_;
}
ArrayRef<const ExcludedCohort> GetExcludedCohorts() const {
DCHECK_IMPLIES(valid_, !needs_prune_);
if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) {
return ArrayRef<const ExcludedCohort>();
} else {
return ArrayRef<const ExcludedCohort>(*excluded_list_);
}
}
// Helper class to create reachable blocks iterator.
class ContainsFunctor {
public:
bool operator()(HBasicBlock* blk) const {
return subgraph_->ContainsBlock(blk);
}
private:
explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {}
const ExecutionSubgraph* const subgraph_;
friend class ExecutionSubgraph;
};
// Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing.
IterationRange<
FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>>
ReachableBlocks() const {
return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this));
}
static bool CanAnalyse(HGraph* graph) {
// If there are any blocks with more than kMaxFilterableSuccessors we can't
// analyse the graph. We avoid this case to prevent excessive memory and
// time usage while allowing a simpler algorithm with a fixed-width
// branching factor.
return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) {
return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors;
});
}
private:
std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const {
DCHECK(valid_);
return allowed_successors_[blk->GetBlockId()];
}
void LimitBlockSuccessors(const HBasicBlock* block,
std::bitset<kMaxFilterableSuccessors> allowed) {
needs_prune_ = true;
allowed_successors_[block->GetBlockId()] &= allowed;
}
// Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal
// with only conditionally materializing objects depending on if we already materialized them
// Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) &&
// PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures
// that no execution can leave and then re-enter any exclusion.
void RemoveConcavity();
// Removes sink nodes. Sink nodes are nodes where there is no execution which
// avoids all removed nodes.
void Prune();
void RecalculateExcludedCohort();
HGraph* graph_;
ScopedArenaAllocator* allocator_;
// The map from block_id -> allowed-successors.
// This is the canonical representation of this subgraph. If a bit in the
// bitset is not set then the corresponding outgoing edge of that block is not
// considered traversable.
ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_;
// Helper that holds which blocks we are able to reach. Only valid if
// 'needs_prune_ == false'.
ArenaBitVector unreachable_blocks_;
// A list of the excluded-cohorts of this subgraph. This is only valid when
// 'needs_prune_ == false'
std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_;
// Bool to hold if there is at least one known path from the start block to
// the end in this graph. Used to short-circuit computation.
bool valid_;
// True if the subgraph is consistent and can be queried. Modifying the
// subgraph clears this and requires a prune to restore.
bool needs_prune_;
// True if no more modification of the subgraph is permitted.
bool finalized_;
friend class ExecutionSubgraphTest;
friend class LoadStoreAnalysisTest;
DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph);
};
std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex);
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
|