1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
|
#pragma once
#include <cstddef>
#include <cstdint>
#include <list>
#include <string>
#include <unordered_map>
#include <vector>
#include <ATen/record_function.h>
#include <c10/macros/Macros.h>
#include <c10/util/hash.h>
#include <torch/csrc/Export.h>
#include <torch/csrc/jit/frontend/source_range.h>
#include <optional>
// TODO: replace with pytorch/rfcs#43 when it is ready.
#define SOFT_ASSERT(cond, ...) \
[&]() -> bool { \
if (C10_UNLIKELY(!(cond))) { \
torch::profiler::impl::logSoftAssert( \
__func__, \
__FILE__, \
static_cast<uint32_t>(__LINE__), \
#cond, \
::c10::str(__VA_ARGS__)); \
if (torch::profiler::impl::softAssertRaises()) { \
TORCH_INTERNAL_ASSERT(cond, __VA_ARGS__); \
} else { \
TORCH_WARN_ONCE(__VA_ARGS__); \
} \
return false; \
} \
return true; \
}()
namespace torch::profiler::impl {
TORCH_API bool softAssertRaises();
TORCH_API void setSoftAssertRaises(std::optional<bool> value);
TORCH_API void logSoftAssert(
const char* func,
const char* file,
uint32_t line,
const char* cond,
const char* args);
TORCH_API inline void logSoftAssert(
const char* func,
const char* file,
uint32_t line,
const char* cond,
::c10::detail::CompileTimeEmptyString args) {
logSoftAssert(func, file, line, cond, (const char*)args);
}
TORCH_API void logSoftAssert(
const char* func,
const char* file,
uint32_t line,
const char* cond,
const std::string& args);
using shape =
std::variant<std::vector<int64_t>, std::vector<std::vector<int64_t>>>;
constexpr int TENSOR_LIST_DISPLAY_LENGTH_LIMIT = 30;
std::string getNvtxStr(
const char* name,
int64_t sequence_nr,
const std::vector<std::vector<int64_t>>& shapes,
at::RecordFunctionHandle op_id = 0,
const std::list<std::pair<at::RecordFunctionHandle, int>>& input_op_ids =
{});
struct TORCH_API FileLineFunc {
std::string filename;
size_t line;
std::string funcname;
};
struct TORCH_API SaveNcclMetaConfig {
bool truncate;
bool introspectMetadata;
bool introspectInputs;
bool introspectOutputs;
// Default constructor with default values
SaveNcclMetaConfig()
: truncate(true),
introspectMetadata(true),
introspectInputs(false),
introspectOutputs(false) {}
SaveNcclMetaConfig(
bool truncate,
bool introspectMetadata,
bool introspectInputs,
bool introspectOutputs)
: truncate(truncate),
introspectMetadata(introspectMetadata),
introspectInputs(introspectInputs),
introspectOutputs(introspectOutputs) {}
};
TORCH_API std::vector<FileLineFunc> prepareCallstack(
const std::vector<jit::StackEntry>& cs);
TORCH_API std::vector<std::string> callstackStr(
const std::vector<FileLineFunc>& cs);
TORCH_API std::string stacksToStr(
const std::vector<std::string>& stacks,
const char* delim);
TORCH_API std::vector<std::vector<int64_t>> inputSizes(
const at::RecordFunction& fn,
const bool flatten_list_enabled = false);
TORCH_API std::string variantShapesToStr(const std::vector<shape>& shapes);
TORCH_API std::string shapesToStr(
const std::vector<std::vector<int64_t>>& shapes);
TORCH_API std::string strListToStr(const std::vector<std::string>& types);
TORCH_API std::string inputOpIdsToStr(
const std::list<std::pair<at::RecordFunctionHandle, int>>& input_op_ids);
TORCH_API std::string ivalueToStr(const c10::IValue& val, bool isString);
TORCH_API std::string ivalueListToStr(const std::vector<c10::IValue>& list);
TORCH_API std::vector<std::string> inputTypes(const at::RecordFunction& fn);
std::unordered_map<std::string, c10::IValue> TORCH_API
saveExtraArgs(const at::RecordFunction& fn);
std::unordered_map<std::string, std::string> TORCH_API saveNcclMeta(
const at::RecordFunction& fn,
const SaveNcclMetaConfig& config = SaveNcclMetaConfig());
int getTensorStartHint(const at::Tensor& t);
bool checkFunctionOutputsForLogging(const at::RecordFunction& fn);
bool checkFunctionInputsForLogging(const at::RecordFunction& fn);
std::pair<bool, std::variant<int, std::vector<int>>> findStartAddrForTensors(
const c10::IValue& val);
uint64_t TORCH_API computeFlops(
const std::string& op_name,
const std::unordered_map<std::string, c10::IValue>& extra_args);
std::string shapeToStr(const std::vector<int64_t>& shape);
template <typename T>
class TORCH_API GlobalStateManager {
public:
static GlobalStateManager& singleton() {
/* library-local */ static GlobalStateManager singleton_;
return singleton_;
}
static void push(std::shared_ptr<T>&& state) {
if (singleton().state_) {
LOG(WARNING) << "GlobalStatePtr already exists!";
} else {
singleton().state_ = std::move(state);
}
}
static auto* get() {
return singleton().state_.get();
}
static std::shared_ptr<T> pop() {
auto out = singleton().state_;
singleton().state_.reset();
return out;
}
private:
GlobalStateManager() = default;
std::shared_ptr<T> state_;
};
struct HashCombine {
template <typename T0, typename T1>
size_t operator()(const std::pair<T0, T1>& i) {
return c10::get_hash((*this)(i.first), (*this)(i.second));
}
template <typename... Args>
size_t operator()(const std::tuple<Args...>& i) {
return c10::get_hash(i);
}
template <typename T>
size_t operator()(const T& i) {
return c10::get_hash(i);
}
};
#ifdef USE_DISTRIBUTED
constexpr auto kCommsName = "Collective name";
constexpr auto kDtype = "dtype";
constexpr auto kInMsgNelems = "In msg nelems";
constexpr auto kOutMsgNelems = "Out msg nelems";
constexpr auto kInSplit = "In split size";
constexpr auto kOutSplit = "Out split size";
constexpr auto kGlobalRankStart = "Global rank start";
constexpr auto kGlobalRankStride = "Global rank stride";
constexpr auto kGroupSize = "Group size";
constexpr auto kProcessGroupName = "Process Group Name";
constexpr auto kProcessGroupDesc = "Process Group Description";
constexpr auto kGroupRanks = "Process Group Ranks";
constexpr auto kRank = "Rank";
constexpr auto kP2pSrc = "Src Rank";
constexpr auto kP2pDst = "Dst Rank";
constexpr auto kInTensorsStart = "Input Tensors start";
constexpr auto kOutTensorsStart = "Output Tensors start";
#endif // USE_DISTRIBUTED
} // namespace torch::profiler::impl
|