1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
#pragma once
#include <ATen/record_function.h>
#include <torch/csrc/Export.h>
namespace torch {
namespace profiler {
namespace impl {
// ----------------------------------------------------------------------------
// -- Profiler Config ---------------------------------------------------------
// ----------------------------------------------------------------------------
enum class C10_API_ENUM ActivityType {
CPU = 0,
CUDA, // CUDA kernels, runtime
NUM_KINETO_ACTIVITIES, // must be the last one
};
enum class C10_API_ENUM ProfilerState {
Disabled = 0,
CPU, // CPU-only profiling
CUDA, // CPU + CUDA events
NVTX, // only emit NVTX markers
ITT, // only emit ITT markers
KINETO, // use libkineto
KINETO_GPU_FALLBACK, // use CUDA events when CUPTI is not available
KINETO_ONDEMAND, // run the profiler in on-demand mode
NUM_PROFILER_STATES, // must be the last one
};
enum class C10_API_ENUM ActiveProfilerType {
NONE = 0,
LEGACY,
KINETO,
NVTX,
ITT
};
struct TORCH_API ExperimentalConfig {
ExperimentalConfig(
std::vector<std::string> profiler_metrics = {},
bool profiler_measure_per_kernel = false,
bool verbose = true);
~ExperimentalConfig() = default;
explicit operator bool() const;
std::vector<std::string> profiler_metrics;
bool profiler_measure_per_kernel;
bool verbose;
};
struct TORCH_API ProfilerConfig {
ProfilerConfig(
ProfilerState state,
bool report_input_shapes = false,
bool profile_memory = false,
bool with_stack = false,
bool with_flops = false,
bool with_modules = false,
ExperimentalConfig experimental_config = ExperimentalConfig());
~ProfilerConfig() = default;
bool disabled() const;
bool global() const;
ProfilerState state;
ExperimentalConfig experimental_config;
bool report_input_shapes;
bool profile_memory;
bool with_stack;
bool with_flops;
bool with_modules;
// For serialization
at::IValue toIValue() const;
static ProfilerConfig fromIValue(const at::IValue& profilerConfigIValue);
};
// ----------------------------------------------------------------------------
// -- Profiler base class -----------------------------------------------------
// ----------------------------------------------------------------------------
struct TORCH_API ProfilerStateBase : public c10::MemoryReportingInfoBase {
explicit ProfilerStateBase(const ProfilerConfig& config);
~ProfilerStateBase() override;
static ProfilerStateBase* get(bool global);
static ProfilerStateBase* get() {
auto* out = get(/*global=*/true);
return out ? out : get(/*global=*/false);
}
static void push(std::shared_ptr<ProfilerStateBase>&& state);
static std::shared_ptr<ProfilerStateBase> pop(bool global);
static std::shared_ptr<ProfilerStateBase> pop() {
auto out = pop(/*global=*/true);
return out ? out : pop(/*global=*/false);
}
const ProfilerConfig& config() const {
return config_;
}
void setCallbackHandle(at::CallbackHandle handle);
void removeCallback();
bool memoryProfilingEnabled() const override {
return config_.profile_memory;
}
virtual ActiveProfilerType profilerType() = 0;
protected:
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
std::mutex state_mutex_;
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
ProfilerConfig config_ = ProfilerConfig(ProfilerState::Disabled);
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
at::CallbackHandle handle_ = 0;
};
// Note: The following are only for the active *thread local* profiler.
TORCH_API bool profilerEnabled();
TORCH_API ActiveProfilerType profilerType();
TORCH_API ProfilerConfig getProfilerConfig();
} // namespace impl
} // namespace profiler
} // namespace torch
|