1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
|
#ifdef USE_KINETO
#include <libkineto.h>
#include <torch/csrc/autograd/profiler_kineto.h>
// Ondemand tracing is not supported on Apple platform
#ifdef __APPLE__
#define ENABLE_GLOBAL_OBSERVER (0)
#else
#define ENABLE_GLOBAL_OBSERVER (1)
#endif
namespace torch {
namespace profiler {
namespace impl {
namespace {
using namespace torch::autograd::profiler;
class LibKinetoClient : public libkineto::ClientInterface {
public:
void init() override {}
void warmup(bool setupOpInputsCollection) override {
reportInputShapes_ = setupOpInputsCollection;
}
void start() override {
ProfilerConfig cfg{
ProfilerState::KINETO_ONDEMAND,
/*report_input_shapes=*/reportInputShapes_,
/*profile_memory=*/false,
/*with_stack=*/withStack_,
/*with_flops=*/false,
/*with_modules=*/false};
std::set<ActivityType> activities{ActivityType::CPU};
std::unordered_set<at::RecordScope> scopes;
scopes.insert(at::RecordScope::FUNCTION);
scopes.insert(at::RecordScope::USER_SCOPE);
scopes.insert(at::RecordScope::BACKWARD_FUNCTION);
enableProfiler(cfg, activities, scopes);
}
void stop() override {
(void)disableProfiler();
}
// @lint-ignore CLANGTIDY cppcoreguidelines-explicit-virtual-functions
void set_withstack(bool withStack) {
withStack_ = withStack;
}
private:
bool reportInputShapes_{true};
bool withStack_{false};
};
} // namespace
} // namespace impl
} // namespace profiler
#if ENABLE_GLOBAL_OBSERVER
struct RegisterLibKinetoClient {
RegisterLibKinetoClient() {
static profiler::impl::LibKinetoClient client;
libkineto::api().registerClient(&client);
}
} register_libkineto_client;
#endif
} // namespace torch
#endif // USE_KINETO
|