1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#include "Common_BinaryEncoding.h"
#include "BuildIR.h"
using namespace vISA;
unsigned long bitsSrcRegFile[4] = {128, 128, 128, 128};
unsigned long bits3SrcFlagRegNum[2] = {128, 128};
unsigned long bitsFlagRegNum[2] = {128, 128};
/// \brief writes the binary buffer to .dat file
///
BinaryEncodingBase::Status BinaryEncodingBase::WriteToDatFile() {
std::string binFileName = fileName + ".dat";
std::string errStr;
std::ofstream os(binFileName.c_str(), std::ios::binary);
if (!os) {
errStr = "Can't open " + binFileName + ".\n";
vISA_ASSERT(false, errStr);
return FAILURE;
}
for (unsigned i = 0, size = (unsigned)binInstList.size(); i < size; i++) {
BinInst *bin = binInstList[i];
if (GetCompactCtrl(bin)) {
os.write(reinterpret_cast<char *>(&(bin->Bytes)), BYTES_PER_INST / 2);
} else {
os.write(reinterpret_cast<char *>(&(bin->Bytes)), BYTES_PER_INST);
}
};
os.close();
return SUCCESS;
}
bool BinaryEncodingBase::isBBBinInstEmpty(G4_BB *bb) {
INST_LIST_ITER ii, iend(bb->end());
for (ii = bb->begin(); ii != iend; ++ii) {
G4_INST *inst = *ii;
if (getBinInst(inst))
return false;
}
return true;
}
G4_INST *BinaryEncodingBase::getFirstNonLabelInst(G4_BB *bb) {
INST_LIST_ITER ii, iend(bb->end());
for (ii = bb->begin(); ii != iend; ++ii) {
G4_INST *inst = *ii;
if (inst->opcode() != G4_label)
return inst;
}
vISA_ASSERT_UNREACHABLE("can't get the inst number for this empty BB");
return NULL;
}
void BinaryEncodingBase::ProduceBinaryBuf(void *&handle) {
uint32_t binarySize = GetInstCounts() * (BYTES_PER_INST / 2);
handle = allocCodeBlock(binarySize);
char *buf = (char *)handle;
if (handle == NULL) {
vISA_ASSERT(false, "mem manager alloc failure in bin encoding");
} else {
for (unsigned i = 0, size = (unsigned)binInstList.size(); i < size; i++) {
BinInst *bin = binInstList[i];
char *ptr = (char *)(bin->Bytes);
if (GetCompactCtrl(bin)) {
memcpy_s(buf, binarySize, ptr, BYTES_PER_INST / 2);
buf += BYTES_PER_INST / 2;
} else {
memcpy_s(buf, binarySize, ptr, BYTES_PER_INST);
buf += BYTES_PER_INST;
}
}
}
}
// 3-src instructions (mad, lrp, bfe, bf2) must be in align16 mode for gen9 and
// earlier; this implies that all operands must be 16-byte aligned and exec size
// must be >=4 We convert a simd1 3-src instruction into simd4 (or simd2 for DF
// mad) and control the dst channel through the dst write mask. For DF mad, we
// also have to fix the source swizzle as .r is not supported for 64-bit types.
// This applies regardless of exec size.
// Additionally, invm and sqrtm math instructions must also be align16
void BinaryEncodingBase::FixAlign16Inst(G4_INST *inst) {
inst->setOptionOn(InstOpt_Align16);
// convert dst to align16
G4_DstRegRegion *dst = inst->getDst();
setWriteMask(dst, ChannelEnable_XYZW);
// convert sources to align16
for (int k = 0, numSrc = inst->getNumSrc(); k < numSrc; k++) {
vISA_ASSERT(inst->getSrc(k)->isSrcRegRegion(),
"Unexpected src to be converted to ALIGN16!");
G4_SrcRegRegion *src = inst->getSrc(k)->asSrcRegRegion();
setSwizzle(src, src->isScalar() ? SrcSwizzle::R : SrcSwizzle::XYZW);
if (inst->opcode() == G4_math &&
(inst->asMathInst()->getMathCtrl() == MATH_INVM ||
inst->asMathInst()->getMathCtrl() == MATH_RSQRTM)) {
switch (inst->getSrc(k)->getType()) {
case Type_DF:
src->setRegion(*kernel.fg.builder,
kernel.fg.builder->createRegionDesc(2, 2, 1));
break;
case Type_F:
case Type_HF:
src->setRegion(*kernel.fg.builder,
kernel.fg.builder->createRegionDesc(4, 4, 1));
break;
default:
vISA_ASSERT_UNREACHABLE("Not implemented");
}
}
}
bool isDoubleInst = (dst->getType() == Type_DF);
if (inst->getExecSize() == g4::SIMD1) {
int subRegOffset = dst->getLinearizedStart() % 16;
if (inst->getCondMod()) {
G4_CondModifier mod = inst->getCondMod()->getMod();
if (subRegOffset != 0 &&
(mod == Mod_g || mod == Mod_ge || mod == Mod_l || mod == Mod_le)) {
vISA_ASSERT(
false, "Invalid alignment for align16 inst of execsize 1 and offset %d", (short)subRegOffset);
}
}
ChannelEnable writeMask = NoChannelEnable;
switch (subRegOffset / 4) {
case 0:
writeMask = isDoubleInst ? ChannelEnable_XY : ChannelEnable_X;
break;
case 1:
writeMask = ChannelEnable_Y;
break;
case 2:
writeMask = isDoubleInst ? ChannelEnable_ZW : ChannelEnable_Z;
break;
case 3:
writeMask = ChannelEnable_W;
break;
default:
vISA_ASSERT_UNREACHABLE("unexpected subreg value");
}
setWriteMask(dst, writeMask);
// FIXME: this is incorrect as it could produce a negative left bound if
// subRegOffset is non-zero. HWConformity appears to force 16-byte alignment
// on dst, however, so we don't ever hit such case.
dst->setLeftBound(dst->getLeftBound() - subRegOffset);
dst->setRightBound(dst->getLeftBound() + 16);
inst->setExecSize(isDoubleInst ? g4::SIMD2 : g4::SIMD4);
G4_Predicate *pred = inst->getPredicate();
if (pred) {
setAlign16PredCtrl(pred, PRED_ALIGN16_X);
}
} else if (inst->getExecSize() == g4::SIMD2 && !isDoubleInst) {
int subRegOffset = dst->getLinearizedStart() % 16;
ChannelEnable writeMask = NoChannelEnable;
switch (subRegOffset / 4) {
case 0:
writeMask = ChannelEnable_XY;
break;
case 2:
writeMask = ChannelEnable_ZW;
break;
default:
vISA_ASSERT_UNREACHABLE("dst must be 8 byte aligned");
}
setWriteMask(dst, writeMask);
dst->setLeftBound(dst->getLeftBound() - subRegOffset);
dst->setRightBound(dst->getLeftBound() + 16);
inst->setExecSize(g4::SIMD4);
vISA_ASSERT(!inst->getPredicate(), "do not support predicated SIMD2 mad");
}
// for double/half inst, we have to additionally fix the source as it doesn't
// support the .r swizzle
if (isDoubleInst) {
for (int i = 0, numSrc = inst->getNumSrc(); i < numSrc; ++i) {
vISA_ASSERT(inst->getSrc(i)->isSrcRegRegion(),
"source must have a region");
G4_SrcRegRegion *src = inst->getSrc(i)->asSrcRegRegion();
const RegionDesc *rd = src->getRegion();
if (src->isScalar() ||
(rd->width == 2 && rd->horzStride == 0 && rd->vertStride == 2)) {
int subRegOffset = src->getLinearizedStart() % 16;
vISA_ASSERT(subRegOffset == 0 || subRegOffset == 8,
"double source must be 8 byte aligned");
setSwizzle(src,
subRegOffset == 0 ? SrcSwizzle::XYXY : SrcSwizzle::ZWZW);
// If subRegOffset is not 16-byte aligned, it will be fixed when
// when encoding the source reg num.
}
}
}
}
void BinaryEncodingBase::FixMathInst(G4_INST *inst) {
vISA_ASSERT(inst->isMath(), "Expect math instruction");
for (int i = 0, numSrc = inst->getNumSrc(); i < numSrc; ++i) {
G4_Operand *src = inst->getSrc(i);
if (src && src->isSrcRegRegion()) {
G4_SrcRegRegion *srcRegion = src->asSrcRegRegion();
const RegionDesc *region = srcRegion->getRegion();
if (inst->getExecSize() > g4::SIMD1 && region->vertStride == 1 &&
region->width == 1 && region->horzStride == 0) {
// rewrite <1;1,0> to <2;2,1> to avoid simulator warning
srcRegion->setRegion(*kernel.fg.builder,
kernel.fg.builder->createRegionDesc(2, 2, 1));
}
}
}
}
// We also fix <1;1,0> src region for align1 ternary instructions as we can't
// encode them in binary
void BinaryEncodingBase::FixInst() {
bool align1Ternary = kernel.fg.builder->hasAlign1Ternary();
for (auto bb : kernel.fg) {
for (auto iter = bb->begin(); iter != bb->end();) {
G4_INST *inst = *iter;
if (inst->isIntrinsic()) {
// remove any intrinsics that should be lowered before binary encoding
vISA_ASSERT(inst->asIntrinsicInst()->getLoweredByPhase() ==
Phase::BinaryEncoding,
"Unexpected intrinsics in binary encoding");
iter = bb->erase(iter);
} else {
++iter;
}
bool isAlign16 = kernel.fg.builder->hasIEEEDivSqrt() &&
(inst->opcode() == G4_madm ||
(inst->isMath() && inst->asMathInst()->isIEEEMath()));
if (!isAlign16) {
isAlign16 =
(!align1Ternary) && (inst->getNumSrc() == 3) && !inst->isSend();
}
if (isAlign16) {
FixAlign16Inst(inst);
} else if (inst->isMath()) {
FixMathInst(inst);
}
}
}
}
void *BinaryEncodingBase::EmitBinary(uint32_t &binarySize) {
void *handle = NULL;
// CommitRelativeAddresses();
binarySize = GetInstCounts() * (BYTES_PER_INST / 2);
/*
Simplifying this. Whatever invokes vISA builder
should know whether to generate binary or not.
Through dll mode, this shouldn't be set.
*/
if (kernel.getOption(vISA_GenerateBinary)) {
WriteToDatFile();
}
ProduceBinaryBuf(handle);
return handle;
}
void BinaryEncodingBase::computeBinaryOffsets() {
// Compute offset for gen instructions
uint64_t offset = 0;
for (auto I = binInstList.begin(), E = binInstList.end(); I != E; ++I) {
BinInst *binInst = *I;
std::streamsize size =
GetCompactCtrl(binInst) ? (BYTES_PER_INST / 2) : BYTES_PER_INST;
binInst->SetGenOffset(offset);
offset += size;
}
}
bool BinaryEncodingBase::doCompaction() const {
return kernel.getOption(vISA_Compaction);
}
|