File: NVPTXSubtarget.h

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.8-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,696 kB
  • sloc: cpp: 7,438,781; ansic: 1,393,871; asm: 1,012,926; python: 241,771; f90: 86,635; objc: 75,411; lisp: 42,144; pascal: 17,286; sh: 8,596; ml: 5,082; perl: 4,730; makefile: 3,591; awk: 3,523; javascript: 2,251; xml: 892; fortran: 672
file content (144 lines) | stat: -rw-r--r-- 6,087 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the NVPTX specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H

#include "NVPTX.h"
#include "NVPTXFrameLowering.h"
#include "NVPTXISelLowering.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include <string>

#define GET_SUBTARGETINFO_HEADER
#include "NVPTXGenSubtargetInfo.inc"

namespace llvm {

class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
  virtual void anchor();
  std::string TargetName;

  // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
  unsigned PTXVersion;

  // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
  // sm_90a == 901
  unsigned int FullSmVersion;

  // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
  // FullSmVersion.
  unsigned int SmVersion;

  NVPTXInstrInfo InstrInfo;
  NVPTXTargetLowering TLInfo;
  std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;

  // NVPTX does not have any call stack frame, but need a NVPTX specific
  // FrameLowering class because TargetFrameLowering is abstract.
  NVPTXFrameLowering FrameLowering;

public:
  /// This constructor initializes the data members to match that
  /// of the specified module.
  ///
  NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                 const std::string &FS, const NVPTXTargetMachine &TM);

  ~NVPTXSubtarget() override;

  const TargetFrameLowering *getFrameLowering() const override {
    return &FrameLowering;
  }
  const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
  const NVPTXRegisterInfo *getRegisterInfo() const override {
    return &InstrInfo.getRegisterInfo();
  }
  const NVPTXTargetLowering *getTargetLowering() const override {
    return &TLInfo;
  }

  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;

  bool hasAtomAddF64() const { return SmVersion >= 60; }
  bool hasAtomScope() const { return SmVersion >= 60; }
  bool hasAtomBitwise64() const { return SmVersion >= 32; }
  bool hasAtomMinMax64() const { return SmVersion >= 32; }
  bool hasAtomCas16() const { return SmVersion >= 70 && PTXVersion >= 63; }
  bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
  bool hasLDG() const { return SmVersion >= 32; }
  bool hasHWROT32() const { return SmVersion >= 32; }
  bool hasFP16Math() const { return SmVersion >= 53; }
  bool hasBF16Math() const { return SmVersion >= 80; }
  bool allowFP16Math() const;
  bool hasMaskOperator() const { return PTXVersion >= 71; }
  bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
  // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
  // release, acq_rel, sc) ?
  bool hasMemoryOrdering() const { return SmVersion >= 70 && PTXVersion >= 60; }
  // Does SM & PTX support atomic relaxed MMIO operations ?
  bool hasRelaxedMMIO() const { return SmVersion >= 70 && PTXVersion >= 82; }
  bool hasDotInstructions() const {
    return SmVersion >= 61 && PTXVersion >= 50;
  }
  // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
  // terminates a basic block. Instead, it would assume that control flow
  // continued to the next instruction. The next instruction could be in the
  // block that's lexically below it. This would lead to a phantom CFG edges
  // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
  // PTX ISA versions 8.3+ we can confidently say that the bug will not be
  // present.
  bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
  bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
  unsigned int getFullSmVersion() const { return FullSmVersion; }
  unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
  // GPUs with "a" suffix have include architecture-accelerated features that
  // are supported on the specified architecture only, hence such targets do not
  // follow the onion layer model. hasAAFeatures() allows distinguishing such
  // GPU variants from the base GPU architecture.
  // - 0 represents base GPU model,
  // - non-zero value identifies particular architecture-accelerated variant.
  bool hasAAFeatures() const { return getFullSmVersion() % 10; }

  // If the user did not provide a target we default to the `sm_30` target.
  std::string getTargetName() const {
    return TargetName.empty() ? "sm_30" : TargetName;
  }
  bool hasTargetName() const { return !TargetName.empty(); }

  bool hasNativeBF16Support(int Opcode) const;

  // Get maximum value of required alignments among the supported data types.
  // From the PTX ISA doc, section 8.2.3:
  //  The memory consistency model relates operations executed on memory
  //  locations with scalar data-types, which have a maximum size and alignment
  //  of 64 bits. Memory operations with a vector data-type are modelled as a
  //  set of equivalent memory operations with a scalar data-type, executed in
  //  an unspecified order on the elements in the vector.
  unsigned getMaxRequiredAlignment() const { return 8; }
  // Emulated loops with 32-bit/64-bit CAS generate better SASS than 16-bit CAS
  unsigned getMinCmpXchgSizeInBits() const { return 32; }

  unsigned getPTXVersion() const { return PTXVersion; }

  NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);

  void failIfClustersUnsupported(std::string const &FailureMessage) const;
};

} // End llvm namespace

#endif