1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
|
#include "stdafx.h"
#include "WindowsLayout.h"
#include "Code/Layout.h"
namespace code {
namespace x64 {
WindowsLayout::WindowsLayout(const Arena *arena) : Layout(arena) {}
Offset WindowsLayout::resultParam() {
Result r = params->result();
if (same(r.memoryRegister(), ptrC)) {
// Shadow space for rcx.
return Offset::sPtr * 2;
} else if (same(r.memoryRegister(), ptrD)) {
// Shadow space for rcx.
return Offset::sPtr * 3;
} else {
dbg_assert(false, L"Should not need resultparam now.");
return Offset();
}
}
void WindowsLayout::saveResult(Listing *dest) {
const Result &result = params->result();
// We have reserved space to store these right above the shadow space!
if (result.memoryRegister() != noReg) {
*dest << mov(ptrRel(ptrStack, Offset::sPtr * 4), ptrA);
} else {
for (Nat i = 0; i < result.registerCount(); i++) {
*dest << mov(ptrRel(ptrStack, Offset::sPtr * Int(i + 4)),
asSize(result.registerAt(i), Size::sPtr));
}
}
}
void WindowsLayout::restoreResult(Listing *dest) {
const Result &result = params->result();
// We have reserved space to store these right above the shadow space!
if (result.memoryRegister() != noReg) {
*dest << mov(ptrA, ptrRel(ptrStack, Offset::sPtr * 4));
} else {
for (Nat i = 0; i < result.registerCount(); i++) {
*dest << mov(asSize(result.registerAt(i), Size::sPtr),
ptrRel(ptrStack, Offset::sPtr * Int(i + 4)));
}
}
}
static bool spillShadow(const Param &p, const Offset &o) {
(void)p;
// Shadow space is at positive offsets from the frame pointer:
return o.v64() > 0;
}
static bool spillRemaining(const Param &p, const Offset &o) {
return !spillShadow(p, o);
}
void WindowsLayout::emitProlog(Listing *dest) {
// Set up stack frame:
*dest << push(ptrFrame);
*dest << preserve(ptrFrame);
*dest << mov(ptrFrame, ptrStack);
// Spill parameters to shadow space as needed. We want to do this early so that we can
// start handling cleanup during exceptions early.
spillParams(dest, &spillShadow);
// Push all registers we need to preserve. We do it in reverse here to make the epilog
// cheaper (only one prolog in functions, but many epilogs).
const Nat maxToStore = 32; // Overkill for X64.
Nat toStoreCount = 0;
Reg toStore[maxToStore];
for (RegSet::Iter i = toPreserve->begin(); i != toPreserve->end(); ++i) {
assert(toStoreCount + 1 < maxToStore);
toStore[toStoreCount++] = i.v();
}
for (Nat i = toStoreCount; i > 0; i--) {
Reg r = asSize(toStore[i - 1], Size::sPtr);
*dest << push(r);
*dest << preserve(r);
}
// Allocate the stack frame. We use a second form of 'prolog' for that to emit the
// proper metadata. We also mark this as the end of the prolog, and from here exceptions
// will start being handled.
Offset stackSize = layout->last() - Size::sPtr*toPreserve->count();
*dest << prolog(engine())->alterSrc(ptrConst(stackSize));
// Spill remaining parameters.
spillParams(dest, &spillRemaining);
}
void WindowsLayout::emitEpilog(Listing *dest) {
// If the last instruction was a call instruction, we need to insert a nop here.
// Otherwise, the exception handling code thinks that we would return to the function
// epilog and will not call our unwind handler. This is fairly rare, since in most cases
// we would have emitted code to destroy variables right before here, but if we have
// variables that are only destroyed when exceptions are thrown, we would miss them if
// we don't have a nop here.
padCallWithNop(dest);
// Note: The instructions in the epilog are quite harshly specified in the ABI. As such,
// we can not use the LEAVE instruction, or load rsp from rbp to avoid accidental
// mis-alignment of the stack.
// Restore stack pointer:
Offset stackSize = layout->last() - Size::sPtr*toPreserve->count();
if (stackSize.v64() != 0)
*dest << add(ptrStack, ptrConst(stackSize));
// Restore registers:
for (RegSet::Iter i = toPreserve->begin(); i != toPreserve->end(); ++i)
*dest << pop(asSize(i.v(), Size::sPtr));
*dest << pop(ptrFrame);
}
static Nat useShadowSpace(Reg reg) {
// Note: Both rcx and xmm0 can not be used simultaneously on Windows, so we can always
// spill like this.
Reg shadowMap1[] = { rcx, rdx, r8, r9 };
Reg shadowMap2[] = { xmm0, xmm1, xmm2, xmm3 };
for (Nat i = 0; i < ARRAY_COUNT(shadowMap1); i++) {
if (same(reg, shadowMap1[i]))
return 2 + i;
if (same(reg, shadowMap2[i]))
return 2 + i;
}
return 0;
}
static Size spillParams(Array<Offset> *out, Listing *l, Params *params, Offset varOffset) {
// NOTE: We could avoid spilling primitives to memory, as we do not generally use those
// registers from code generated for the generic platform. However, we would need to
// save as soon as we perform a function call anyway, which is probably more expensive
// than just spilling them unconditionally in the function prolog.
Array<Var> *all = l->allParams();
{
// Start by computing the locations of parameters passed on the stack.
Offset stackOffset = Offset::sPtr * 2; // return addr, rbp
for (Nat i = 0; i < params->stackCount(); i++) {
Nat paramId = params->stackParam(i).id(); // Should never be 'Param::returnId'
Var var = all->at(paramId);
out->at(var.key()) = stackOffset + Offset(params->stackOffset(i));
}
}
Size used;
// Then, compute where to spill the remaining registers. Keep track if they are passed
// in memory or not, since that affects their size!
for (Nat i = 0; i < params->registerCount(); i++) {
Param p = params->registerParam(i);
if (p.empty())
continue;
if (p.id() == Param::returnId())
continue;
Var var = all->at(p.id());
Offset &to = out->at(var.key());
// Already computed -> no need to spill.
if (to != Offset())
continue;
// If it is an integer register, use its shadow space.
if (Nat shadowIndex = useShadowSpace(params->registerSrc(i))) {
to = Offset::sPtr * shadowIndex;
continue;
}
// Try to squeeze the preserved registers as tightly as possible while keeping alignment.
// This is more concentrated than when pushing registers to the stack. On the stack, all
// parameters are aligned to 8 bytes, while we do not need that here. We only need to keep
// the natural alignment of the parameters to keep the CPU happy.
Size sz = p.size();
if (l->freeOpt(var) & freeIndirection)
sz = Size::sPtr;
// However, since we don't want to use multiple instructions to spill a single
// parameter, we increase the size of some parameters a bit.
if (asSize(ptrA, sz) == noReg)
sz = sz + Size::sInt.alignment();
used = (used + sz).alignedAs(sz);
to = -(varOffset + used);
}
return used.aligned();
}
enum EnsureFlags {
// Assume we are only calling destructors. I.e., if no destructors need to be executed,
// then we won't modify the shadow space at all.
ensureOnlyDtors = 0x0,
// Assume that we will need the extra space always, but it is fine to clobber any
// variables that don't need their destructors to be called.
ensureAllowClobber = 0x1,
// Assume that we will need the extra space, but disallow clobbering any active
// variables.
ensureNoClobber = 0x2,
};
bool shouldPrint = false;
// Check so that enough shadow space is available to call destructors for all variables that
// need destruction. If 'force' is true, then all variables are checked instead.
static void ensureShadowSpace(Listing *l, Array<Offset> *offset, Array<Var> *vars, Int size, EnsureFlags flags) {
for (Nat i = 0; i < vars->count(); i++) {
Var v = vars->at(i);
if ((flags >= ensureNoClobber) || (l->freeOpt(v) & freeOnBlockExit)) {
Int off = offset->at(v.key()).v64();
// Clamp to zero: we need to call something. Even if the variable is located
// above return addr, make sure to not clobber return addr and stored ebp!
if (off > 0)
off = 0;
// Note: offset->last() is the total size of the stack.
if (-off + size > offset->last().v64())
offset->last() = Offset(-off + size);
}
}
// If we always execute something, make sure we don't accidentally clobber rbp and
// return address.
if (flags >= ensureAllowClobber)
if (size > offset->last().v64())
offset->last() = Offset(size);
}
// Check so that enough shadow space is available in the following situations:
//
// 1. When destroying a block, we must be able to call destructors for the
// object with the lowest address.
// 2. When copying the result (fnRet or fnRetRef), we might need to call a
// copy-ctor. Furthermore, we might also need to temporarily store a word
// somewhere (also during 'epilog').
static void ensureShadowSpace(Listing *l, Params *params, Array<Offset> *offset, Array<Var> *allVars) {
const Int shadowSz = 0x20;
// Offset before = offset->last();
// For #1 above, we can just iterate through all variables and make sure that the total
// stack size is 0x20 larger than the object, for all objects that have destructors.
ensureShadowSpace(l, offset, allVars, shadowSz, ensureOnlyDtors);
// For #2, we need to investigate the listing, to see which blocks are active when we
// need additional stack space. If we would do it for all blocks, we would waste 32
// bytes in all stack frames.
// Note: We don't bother with sanity checks here. They are done when the transform
// is executed later on anyway.
// Extra space for storing the result through destructors:
Int extraSpace = shadowSz;
if (params->result().memoryRegister())
extraSpace += sizeof(void *); // We need to store rax during destructors
else
extraSpace += params->result().registerCount() * sizeof(void *);
Block current;
for (Nat i = 0; i < l->count(); i++) {
Instr *instr = l->at(i);
switch (instr->op()) {
case op::prolog:
current = l->root();
break;
case op::beginBlock:
current = instr->src().block();
if (instr->dest() == Operand()) {
// No temporary register specified! Make sure we have space, and tell it to
// spill to the stack.
ensureShadowSpace(l, offset, l->allVars(current), 0x8, ensureNoClobber);
l->setInstr(i, instr->alterDest(ptrRel(ptrStack, Offset())));
}
break;
case op::endBlock:
// Note: destroys the block, but we care about that in case #1 above, so we
// don't need to do anything more here. We only need to keep track of the
// current block.
current = l->parent(instr->src().block());
break;
case op::jmpBlock:
// Note: Calls dtors. We take care of that in case #1 above, so we don't need to
// do anything more here.
break;
case op::fnRet:
case op::fnRetRef:
// This is similar to the code for 'epilog'. The difference is that we consider
// *all* variables here if we need to call a copy constructor.
// Note: This is only necessary when we call a copy ctor, we are fine
// in cases where we return in memory but can use memcpy. Thus, the following
// is too conservative:
// if (params->result().memoryRegister() != noReg)
if (as<ComplexDesc>(l->result)) {
// TODO: Most likely, we can give this function 'ensureAllowClobber'
// instead. It is generally fine if we clobber variables without destructors
// (the complex type we have here most likely has a destructor).
ensureShadowSpace(l, offset, l->allVars(current), shadowSz, ensureNoClobber);
}
// Fall through
case op::epilog:
// Ensure that all variables with dtors have shadow space, as well as one extra
// word to store the result.
for (Block b = current; b != Block(); b = l->parent(b)) {
ensureShadowSpace(l, offset, l->allVars(b), extraSpace, ensureOnlyDtors);
}
break;
}
}
}
Array<Offset> *WindowsLayout::computeLayout(Listing *l, Params *params, Nat spilled) {
Array<Offset> *result = code::layout(l);
// Saved registers and other things:
Offset varOffset = Offset::sPtr * spilled;
// Figure out which parameters we need to spill into memory.
varOffset += code::x64::spillParams(result, l, params, varOffset);
varOffset = varOffset.alignAs(Size::sPtr);
// Update the layout of the other variables according to the size we needed for
// parameters and spilled registers.
Array<Var> *all = l->allVars();
for (Nat i = 0; i < all->count(); i++) {
Var var = all->at(i);
Nat id = var.key();
if (!l->isParam(var)) {
Size s = var.size();
if (l->freeOpt(var) & freeIndirection)
s = Size::sPtr;
result->at(id) = -(result->at(id) + s.aligned() + varOffset);
}
}
result->last() += varOffset;
ensureShadowSpace(l, params, result, all);
if (result->last().v64() & 0xF) {
// Need to be aligned to 64 bits. Otherwise, the SIMD operations used widely on
// X86-64 will not work properly.
result->last() = Offset(roundUp(result->last().v64(), 0x10));
}
// for (Nat i = 0; i < result->count(); i++)
// PLN(i << L": " << result->at(i));
return result;
}
}
}
|