1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2024 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
//
/// GenXSubtarget : subtarget information
/// -------------------------------------
///
/// GenXSubtarget is the GenX-specific subclass of TargetSubtargetInfo. It takes
/// features detected by the front end (what the Gen architecture is),
/// and exposes flags to the rest of the GenX backend for
/// various features (e.g. whether 64 bit operations are supported).
///
/// Where subtarget features are used is noted in the documentation of GenX
/// backend passes.
///
/// The flags exposed to the rest of the GenX backend are as follows. Most of
/// these are currently not used.
///
//===----------------------------------------------------------------------===//
#ifndef GENXSUBTARGET_H
#define GENXSUBTARGET_H
#include "GenX.h"
#include "visa_igc_common_header.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Pass.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
#define GET_SUBTARGETINFO_ENUM
#include "GenXGenSubtargetInfo.inc"
namespace llvm {
class GlobalValue;
class Instruction;
class StringRef;
class TargetMachine;
class GenXSubtarget final : public GenXGenSubtargetInfo {
public:
enum GenXTargetId {
Gen8,
Gen9,
Gen9LP,
Gen11,
XeLP,
XeHP,
XeHPG,
XeLPG,
XeLPGPlus,
XeHPC,
XeHPCVG,
Xe2,
Invalid,
};
protected:
// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
GenXTargetId TargetId;
private:
// HasLongLong - True if subtarget supports long long type
bool HasLongLong = false;
// HasFP64 - True if subtarget supports double type
bool HasFP64 = false;
// HasIEEEDivSqrt - True if subtarget supports IEEE-754 div and sqrt
bool HasIEEEDivSqrt = false;
// FDivFSqrt64Emu - True if subtarget requires partial fp64 emulation
bool FDivFSqrt64Emu = false;
// DisableJmpi - True if jmpi is disabled.
bool DisableJmpi = false;
// DisableVectorDecomposition - True if vector decomposition is disabled.
bool DisableVectorDecomposition = false;
// DisableJumpTables - True if switch to jump tables lowering is disabled.
bool DisableJumpTables = false;
// Only generate warning when callable is used in the middle of the kernel
bool WarnCallable = false;
// Size of one general register in bytes.
unsigned GRFByteSize = 32;
// Maximum width of LSC messages.
unsigned LSCMaxWidth = 16;
unsigned NumThreadsPerEU = 0;
// True if legacy data-port messages are disabled
bool TranslateLegacyMessages = false;
// Currenly used for PVC B-stepping (some i64 operations are unsupported)
bool PartialI64Emulation = false;
// True if there is no legacy dataport shared function.
bool NoLegacyDataport = false;
// Some targets do not support i64 ops natively, we have an option to emulate
bool EmulateLongLong = false;
// True if target supports native 64-bit add
bool HasAdd64 = false;
// True if it is profitable to use native DxD->Q multiplication
bool UseMulDDQ = false;
// True if codegenerating for OCL runtime (set by default since CMRT removed)
bool OCLRuntime = true;
// True if subtarget supports switchjmp visa instruction
bool HasSwitchjmp = false;
// True if subtarget supports preemption
bool HasPreemption = false;
// True if subtarget requires WA for nomask instructions under divergent
// control flow
bool WaNoMaskFusedEU = false;
// True if subtarget has fused EUs
bool HasFusedEU = false;
// True if subtarget supports 32-bit integer division
bool HasIntDivRem32 = false;
// True if subtarget supports 32-bit rol/ror instructions
bool HasBitRotate = false;
// True if subtarget supports 64-bit rol/ror instructions
bool Has64BitRotate = false;
// True if subtarget gets HWTID from predefined variable
bool GetsHWTIDFromPredef = false;
// True is thread payload should be loaded from memory.
bool HasThreadPayloadInMemory = false;
// Has L1 read-only cache.
bool HasL1ReadOnlyCache = false;
// Supress local memory fence.
bool HasLocalMemFenceSupress = false;
/// Packed float immediate vector operands are supported.
bool HasPackedFloat = false;
/// True if subtarget supports LSC messages
bool HasLSCMessages = false;
/// True if subtarget supports typed LSC messages
bool HasLSCTypedMessages = false;
/// True if subtarget supports constant offset for LSC message address
bool HasLSCOffset = false;
/// True if subtarget supports half SIMD LSC messages
bool HasHalfSIMDLSC = false;
/// True if subtarget supports sampler messages
bool HasSampler = false;
/// Has multi-tile.
bool HasMultiTile = false;
/// Has L3 cache-coherent cross tiles.
bool HasL3CacheCoherentCrossTiles = false;
/// Has L3 flush on GPU-scope invalidate.
bool HasL3FlushOnGPUScopeInvalidate = false;
/// Has denormal control for BF16 and TF32 types on DPAS
bool HasSystolicDenormControl = false;
/// True if Vx1 and VxH indirect addressing are allowed for Byte datatypes
bool HasMultiIndirectByteRegioning = false;
/// True if subtarget supports ADD3 instruction
bool HasAdd3 = false;
/// True if subtarget supports BFN instruction
bool HasBfn = false;
/// True if subtarget supports SAD and SADA2 instructions
bool HasSad2 = false;
/// True if subtarget supports OWord SLM read/write messages
bool HasSLMOWord = false;
/// True if subtarget supports SIMD32 MAD instruction
bool HasMadSimd32 = false;
/// True if subtarget requires A32 byte scatter emulation
bool HasWaNoA32ByteScatter = false;
/// True if subtarget supports indirect cross-grf access
bool HasIndirectGRFCrossing = false;
/// True if subtarget supports indirect cross-grf byte access
bool HasIndirectByteGRFCrossing = false;
/// True if subtarget supports named barriers
bool HasNamedBarriers = false;
/// True if subtarget supports media walker
bool HasMediaWalker = false;
/// True if subtarget supports large GRF mode
bool HasLargeGRF = false;
// True if target supports local integer compare exchange 64-bit
bool HasLocalIntegerCas64 = false;
// True if target supports global double precision atomic add/sub
bool HasGlobalAtomicAddF64 = false;
/// Max supported SLM size (in kbytes)
int MaxSLMSize = 64;
// Number of elements in Address Register
unsigned AddressRegisterElements = 16;
// Shows which surface should we use for stack
PreDefined_Surface StackSurf;
public:
// This constructor initializes the data members to match that
// of the specified triple.
//
GenXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS);
GenXTargetId getTargetId() const { return TargetId; }
// GRF size in bytes.
unsigned getGRFByteSize() const { return GRFByteSize; }
unsigned getNumThreadsPerEU() const { return NumThreadsPerEU; }
// LSC instructions can operate either in full SIMD mode or
// in half SIMD mode. This defines how many registers are
// used by the data payload.
// getLSCMinWidth() returns half of the maximum SIMD width.
// getLSCMaxWidth() returns the maximum SIMD width.
// Instructions narrower than getLSCMinWidth() still use
// the same amount of registers for their data payload
// as if they were getLSCMinWidth() wide.
unsigned getLSCMinWidth() const { return getLSCMaxWidth() / 2; }
unsigned getLSCMaxWidth() const { return LSCMaxWidth; }
// The maximum amount of registers that an LSC message's data payload
// can take up.
unsigned getLSCMaxDataRegisters() const { return 8; }
unsigned getSamplerMinWidth() const { return GRFByteSize / genx::DWordBytes; }
unsigned getSamplerMaxWidth() const { return 2 * getSamplerMinWidth(); }
// ParseSubtargetFeatures - Parses features string setting specified
// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
// \brief Initialize the features for the GenX target.
void initSubtargetFeatures(StringRef CPU, StringRef FS);
bool isInternalIntrinsicSupported(unsigned ID) const;
public:
/// * translateMediaWalker - true if translate media walker APIs
bool translateMediaWalker() const { return !HasMediaWalker; }
// TODO: consider implementing 2 different getters
/// * has add3 and bfn instructions
bool hasAdd3Bfn() const { return HasAdd3 && HasBfn; }
int dpasWidth() const { return GRFByteSize / 4; }
int getNumElementsInAddrReg() const { return GRFByteSize / 4; }
bool hasLSCMessages() const { return HasLSCMessages; }
bool hasLSCTypedMessages() const { return HasLSCTypedMessages; }
bool hasLSCOffset() const { return HasLSCOffset; }
bool translateLegacyMessages() const {
return HasLSCMessages && TranslateLegacyMessages;
}
bool translateMediaBlockMessages() const {
return HasLSCTypedMessages && TranslateLegacyMessages;
}
bool hasHalfSIMDLSC() const { return HasHalfSIMDLSC; }
bool partialI64Emulation() const { return PartialI64Emulation; }
bool noLegacyDataport() const { return NoLegacyDataport; }
bool hasSampler() const { return HasSampler; }
/// * hasPackedFloat - true if packed float immediate vector operands are
/// supported
bool hasPackedFloat() const { return HasPackedFloat; }
/// * emulateLongLong - true if i64 emulation is requested
bool emulateLongLong() const { return EmulateLongLong; }
/// * hasLongLong - true if target supports long long
bool hasLongLong() const { return HasLongLong; }
/// * hasFP64 - true if target supports double fp
bool hasFP64() const { return HasFP64; }
/// * hasIEEEDivSqrt - true if target supports IEEE-754 div and sqrt
bool hasIEEEDivSqrt() const { return HasIEEEDivSqrt; }
/// * emulateFDivFSqrt64 - true if target requires partial fp64 emulation
bool emulateFDivFSqrt64() const { return FDivFSqrt64Emu; }
/// * hasAdd64 - true if target supports native 64-bit add/sub
bool hasAdd64() const { return HasAdd64; }
/// * useMulDDQ - true if is desired to emit DxD->Q mul instruction
bool useMulDDQ() const { return UseMulDDQ; }
/// * disableJmpi - true if jmpi is disabled.
bool disableJmpi() const { return DisableJmpi; }
/// * WaNoA32ByteScatteredStatelessMessages - true if there is no A32 byte
/// scatter stateless message.
bool WaNoA32ByteScatteredStatelessMessages() const {
return HasWaNoA32ByteScatter;
}
/// * disableVectorDecomposition - true if vector decomposition is disabled.
bool disableVectorDecomposition() const { return DisableVectorDecomposition; }
/// * disableJumpTables - true if switch to jump tables lowering is disabled.
bool disableJumpTables() const { return DisableJumpTables; }
/// * has switchjmp instruction
bool hasSwitchjmp() const { return HasSwitchjmp; }
/// * has preemption
bool hasPreemption() const { return HasPreemption; }
/// * needsWANoMaskFusedEU() - true if we need to apply WA for NoMask ops
bool needsWANoMaskFusedEU() const { return WaNoMaskFusedEU; }
/// * hasFusedEU() - true if subtarget has fused EUs
bool hasFusedEU() const { return HasFusedEU; }
/// * has integer div/rem instruction
bool hasIntDivRem32() const { return HasIntDivRem32; }
/// * warnCallable() - true if compiler only generate warning for
/// callable in the middle
bool warnCallable() const { return WarnCallable; }
/// * hasIndirectGRFCrossing - true if target supports an indirect region
/// crossing one GRF boundary
bool hasIndirectGRFCrossing() const { return HasIndirectGRFCrossing; }
/// * hasIndirectByteGRFCrossing - true if target supports an indirect region
/// crossing one GRF boundary with byte type
bool hasIndirectByteGRFCrossing() const { return HasIndirectByteGRFCrossing; }
/// * hasMultiIndirectByteRegioning - true if target supports an multi
/// indirect regions with byte type
bool hasMultiIndirectByteRegioning() const {
return HasMultiIndirectByteRegioning;
};
bool hasNBarrier() const { return HasNamedBarriers; }
/// * getMaxSlmSize - returns maximum allowed SLM size (in KB)
unsigned getMaxSlmSize() const {
return MaxSLMSize;
}
bool hasThreadPayloadInMemory() const { return HasThreadPayloadInMemory; }
/// * hasSad2Support - returns true if sad2/sada2 are supported by target
bool hasSad2Support() const { return HasSad2; }
bool hasBitRotate() const { return HasBitRotate; }
bool has64BitRotate() const { return Has64BitRotate; }
bool hasLocalIntegerCas64() const { return HasLocalIntegerCas64; }
bool hasGlobalAtomicAddF64() const { return HasGlobalAtomicAddF64; }
bool hasL1ReadOnlyCache() const { return HasL1ReadOnlyCache; }
bool hasLocalMemFenceSupress() const { return HasLocalMemFenceSupress; }
bool hasMultiTile() const { return HasMultiTile; };
bool hasL3CacheCoherentCrossTiles() const {
return HasL3CacheCoherentCrossTiles;
}
bool hasL3FlushOnGPUScopeInvalidate() const {
return HasL3FlushOnGPUScopeInvalidate;
}
bool hasSLMOWord() const { return HasSLMOWord; }
bool hasMadSimd32() const { return HasMadSimd32; }
bool hasLargeGRF() const { return HasLargeGRF; }
/// * getsHWTIDFromPredef - some subtargets get HWTID from
// predefined variable instead of sr0, returns *true* for such ones.
bool getsHWTIDFromPredef() const { return GetsHWTIDFromPredef; }
bool hasSystolicDenormControl() const { return HasSystolicDenormControl; }
uint32_t getMaxThreadsNumPerSubDevice() const;
ArrayRef<std::pair<int, int>> getThreadIdReservedBits() const;
/// bit fields for SliceID and SubsliceID (from lsb to msb).
ArrayRef<std::pair<int, int>> getSubsliceIdBits() const;
/// bit fields for EU ID (from lsb to msb).
ArrayRef<std::pair<int, int>> getEUIdBits() const;
/// bit fields for ThreadID (from lsb to msb).
ArrayRef<std::pair<int, int>> getThreadIdBits() const;
unsigned getNumCacheLevels() const { return 2; }
// Address Register size in elements.
unsigned getAddressRegisterElements() const {
return AddressRegisterElements;
}
// Generic helper functions...
const Triple &getTargetTriple() const { return TargetTriple; }
TARGET_PLATFORM getVisaPlatform() const;
/// * stackSurface - return a surface that should be used for stack.
PreDefined_Surface stackSurface() const { return StackSurf; }
bool isIntrinsicSupported(unsigned ID) const;
};
} // namespace llvm
#endif
|