1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
|
/*
* Copyright (C) 2015-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "AirFixPartialRegisterStalls.h"
#if ENABLE(B3_JIT)
#include "AirBasicBlock.h"
#include "AirCode.h"
#include "AirInsertionSet.h"
#include "AirInst.h"
#include "AirInstInlines.h"
#include "AirPhaseScope.h"
#include "MacroAssembler.h"
#include <wtf/IndexMap.h>
#include <wtf/IndexSet.h>
#include <wtf/Vector.h>
namespace JSC { namespace B3 { namespace Air {
namespace {
bool hasPartialXmmRegUpdate(const Inst& inst)
{
switch (inst.kind.opcode) {
case ConvertDoubleToFloat:
case ConvertFloatToDouble:
case ConvertInt32ToDouble:
case ConvertInt64ToDouble:
case ConvertInt32ToFloat:
case ConvertInt64ToFloat:
case SqrtDouble:
case SqrtFloat:
case CeilDouble:
case CeilFloat:
case FloorDouble:
case FloorFloat:
return true;
default:
break;
}
return false;
}
bool isDependencyBreaking(const Inst& inst)
{
// "xorps reg, reg" is used by the frontend to remove the dependency on its argument.
return inst.kind.opcode == MoveZeroToDouble;
}
// FIXME: find a good distance per architecture experimentally.
// LLVM uses a distance of 16 but that comes from Nehalem.
unsigned char minimumSafeDistance = 16;
struct FPDefDistance {
FPDefDistance()
{
for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i)
distance[i] = 255;
}
void reset(FPRReg reg)
{
unsigned index = MacroAssembler::fpRegisterIndex(reg);
distance[index] = 255;
}
void add(FPRReg reg, unsigned registerDistance)
{
unsigned index = MacroAssembler::fpRegisterIndex(reg);
if (registerDistance < distance[index])
distance[index] = static_cast<unsigned char>(registerDistance);
}
bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0)
{
bool changed = false;
for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) {
unsigned regDistance = precessorDistance.distance[i] + constantOffset;
if (regDistance < minimumSafeDistance && regDistance < distance[i]) {
distance[i] = regDistance;
changed = true;
}
}
return changed;
}
unsigned char distance[MacroAssembler::numberOfFPRegisters()];
};
void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd)
{
--distanceToBlockEnd;
if (isDependencyBreaking(inst)) {
localDistance.reset(inst.args[0].tmp().fpr());
return;
}
inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) {
ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation.");
if (tmp.isFPR() && Arg::isAnyDef(role))
localDistance.add(tmp.fpr(), distanceToBlockEnd);
});
}
}
void fixPartialRegisterStalls(Code& code)
{
if (!isX86())
return;
PhaseScope phaseScope(code, "fixPartialRegisterStalls"_s);
Vector<BasicBlock*> candidates;
for (BasicBlock* block : code) {
for (const Inst& inst : *block) {
if (hasPartialXmmRegUpdate(inst)) {
candidates.append(block);
break;
}
}
}
// Fortunately, Partial Stalls are rarely used. Return early if no block
// cares about them.
if (candidates.isEmpty())
return;
// For each block, this provides the distance to the last instruction setting each register
// on block *entry*.
IndexMap<BasicBlock*, FPDefDistance> lastDefDistance(code.size());
// Blocks with dirty distance at head.
IndexSet<BasicBlock*> dirty;
// First, we compute the local distance for each block and push it to the successors.
for (BasicBlock* block : code) {
FPDefDistance localDistance;
unsigned distanceToBlockEnd = block->size();
for (Inst& inst : *block)
updateDistances(inst, localDistance, distanceToBlockEnd);
for (BasicBlock* successor : block->successorBlocks()) {
if (lastDefDistance[successor].updateFromPrecessor(localDistance))
dirty.add(successor);
}
}
// Now we propagate the minimums accross blocks.
bool changed;
do {
changed = false;
for (BasicBlock* block : code) {
if (!dirty.remove(block))
continue;
// Little shortcut: if the block is big enough, propagating it won't add any information.
if (block->size() >= minimumSafeDistance)
continue;
unsigned blockSize = block->size();
FPDefDistance& blockDistance = lastDefDistance[block];
for (BasicBlock* successor : block->successorBlocks()) {
if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) {
dirty.add(successor);
changed = true;
}
}
}
} while (changed);
// Finally, update each block as needed.
InsertionSet insertionSet(code);
for (BasicBlock* block : candidates) {
unsigned distanceToBlockEnd = block->size();
FPDefDistance& localDistance = lastDefDistance[block];
for (unsigned i = 0; i < block->size(); ++i) {
Inst& inst = block->at(i);
if (hasPartialXmmRegUpdate(inst)) {
RegisterSetBuilder defs;
RegisterSetBuilder uses;
inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width width) {
if (tmp.isFPR() && width <= Width64) {
if (Arg::isAnyDef(role))
defs.add(tmp.fpr(), IgnoreVectors);
if (Arg::isAnyUse(role))
uses.add(tmp.fpr(), IgnoreVectors);
}
});
// We only care about values we define but not use. Otherwise we have to wait
// for the value to be resolved anyway.
defs.exclude(uses);
defs.buildWithLowerBits().forEach([&] (Reg reg) {
if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance)
insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg));
});
}
updateDistances(inst, localDistance, distanceToBlockEnd);
}
insertionSet.execute(block);
}
}
} } } // namespace JSC::B3::Air
#endif // ENABLE(B3_JIT)
|