1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
#pragma once
#include <memory>
#include <string>
// Skip Kineto dependency on mobile unless explicitly asked for.
// When is it explicitly asked for?
// KinetoEdgeCPUProfiler uses KinetoProfiler for cpu
// event profiling. This has a dependency on cpu only libkineto
#if defined(USE_KINETO) && defined(C10_MOBILE) && \
!defined(EDGE_PROFILER_USE_KINETO)
#undef USE_KINETO
#endif
#include <ActivityType.h>
#include <torch/csrc/Export.h>
#include <torch/csrc/profiler/api.h>
#ifdef USE_KINETO
// Forward declarations so we don't have to include `libkineto.h` in a header.
namespace libkineto {
class GenericTraceActivity;
struct CpuTraceBuffer;
class ActivityTraceInterface;
} // namespace libkineto
#endif
namespace torch {
namespace profiler {
#ifdef USE_KINETO
constexpr bool kKinetoAvailable{true};
#else
constexpr bool kKinetoAvailable{false};
#endif
namespace impl::kineto {
// ----------------------------------------------------------------------------
// -- Interface (Does not require Kineto) -------------------------------------
// ----------------------------------------------------------------------------
struct DeviceAndResource {
int32_t device;
int32_t resource;
};
const DeviceAndResource kineto_ids();
#ifdef USE_KINETO
using trace_t = libkineto::CpuTraceBuffer;
using interface_trace_t = libkineto::ActivityTraceInterface;
using activity_t = libkineto::GenericTraceActivity;
#else
struct DummyTraceBuffer {};
struct DummyTraceInterface {};
using trace_t = DummyTraceBuffer;
using interface_trace_t = DummyTraceBuffer;
struct activity_t;
#endif // USE_KINETO
void addMetadata(
activity_t* activity,
const std::string& key,
const std::string& value);
// Wraps: libkineto::CpuTraceBuffer
struct TraceWrapper {
TraceWrapper(const int64_t start_time, const std::string& name);
// The caller is expected to hold a mutex when calling `addCPUActivity`.
activity_t* addCPUActivity(
const std::string& name,
const libkineto::ActivityType type,
const DeviceAndResource device_and_resource,
const uint64_t correlation_id,
const int64_t start_time,
const int64_t end_time);
void transferCpuTrace(int64_t end_time);
explicit operator bool() const;
std::unique_ptr<trace_t>& get() {
return cpu_trace_;
}
private:
std::unique_ptr<trace_t> cpu_trace_;
};
// Wraps libkineto::ActivityTraceInterface
struct ActivityTraceWrapper {
explicit ActivityTraceWrapper(std::unique_ptr<interface_trace_t>&& trace);
ActivityTraceWrapper() = default;
explicit operator bool() const;
void save(const std::string& path);
const std::unique_ptr<interface_trace_t>& get() {
return trace_;
}
private:
std::unique_ptr<interface_trace_t> trace_;
#ifdef USE_KINETO
bool saved_ = false; // Kineto's save is destructive
#endif
};
using ActivitySet = std::set<torch::autograd::profiler::ActivityType>;
void prepareTrace(
const bool cpuOnly,
const ActivitySet& activities,
const torch::profiler::impl::ExperimentalConfig& config,
const std::string& trace_id = "");
void toggleCollectionDynamic(const bool enable);
void startTrace();
ActivityTraceWrapper stopTrace();
void pushCorrelationId(uint64_t correlation_id);
void pushUserCorrelationId(uint64_t correlation_id);
void popCorrelationId();
void popUserCorrelationId();
void recordThreadInfo();
bool collectivesProfilerExists();
void logInvariantViolation(
const std::string& assertion,
const std::string& error,
const std::string& profile_id,
const std::string& group_profile_id);
} // namespace impl::kineto
} // namespace profiler
namespace autograd::profiler {
c10::DeviceType deviceTypeFromActivity(libkineto::ActivityType activity_type);
TORCH_API void addMetadataJson(
const std::string& key,
const std::string& value);
TORCH_API void profilerStep();
} // namespace autograd::profiler
} // namespace torch
|