1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
|
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <stack>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <torch/csrc/profiler/events.h>
#include <c10/util/Exception.h>
namespace torch::profiler::impl::linux_perf {
/*
* Maximum number of events supported
* This stems from the hardware limitation on CPU performance counters, and the
* fact that we don't support time multiplexing just yet.
* Time multiplexing involves scaling the counter values proportional to
* the enabled and running time or running the workload multiple times.
*/
constexpr uint8_t MAX_EVENTS = 4;
struct PerfCounter {
uint64_t value; /* The value of the event */
uint64_t time_enabled; /* for TIME_ENABLED */
uint64_t time_running; /* for TIME_RUNNING */
};
/*
* Basic perf event handler for Android and Linux
*/
class PerfEvent {
public:
explicit PerfEvent(std::string& name) : name_(name) {}
PerfEvent(const PerfEvent& other) = delete;
PerfEvent& operator=(const PerfEvent&) = delete;
PerfEvent& operator=(PerfEvent&& other) noexcept {
if (this != &other) {
fd_ = other.fd_;
other.fd_ = -1;
name_ = std::move(other.name_);
}
return *this;
}
PerfEvent(PerfEvent&& other) noexcept {
*this = std::move(other);
}
~PerfEvent();
/* Setup perf events with the Linux Kernel, attaches perf to this process
* using perf_event_open(2) */
void Init();
/* Stop incrementing hardware counters for this event */
void Disable() const;
/* Start counting hardware event from this point on */
void Enable() const;
/* Zero out the counts for this event */
void Reset() const;
/* Returns PerfCounter values for this event from kernel, on non supported
* platforms this always returns zero */
uint64_t ReadCounter() const;
private:
/* Name of the event */
std::string name_;
int fd_ = -1;
};
class PerfProfiler {
public:
/* Configure all the events and track them as individual PerfEvent */
void Configure(std::vector<std::string>& event_names);
/* Enable events counting from here */
void Enable();
/* Disable counting and fill in the caller supplied container with delta
* calculated from the start count values since last Enable() */
void Disable(perf_counters_t&);
private:
uint64_t CalcDelta(uint64_t start, uint64_t end) const;
void StartCounting() const;
void StopCounting() const;
std::vector<PerfEvent> events_;
std::stack<perf_counters_t> start_values_;
};
} // namespace torch::profiler::impl::linux_perf
|