File: perf.h

package info (click to toggle)
pytorch 2.6.0%2Bdfsg-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 161,672 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (101 lines) | stat: -rw-r--r-- 2,584 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#pragma once

#include <array>
#include <cstdint>
#include <memory>
#include <stack>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include <torch/csrc/profiler/events.h>

#include <c10/util/Exception.h>

namespace torch::profiler::impl::linux_perf {

/*
 * Maximum number of events supported
 * This stems from the hardware limitation on CPU performance counters, and the
 * fact that we don't support time multiplexing just yet.
 * Time multiplexing involves scaling the counter values proportional to
 * the enabled and running time or running the workload multiple times.
 */
constexpr uint8_t MAX_EVENTS = 4;

struct PerfCounter {
  uint64_t value; /* The value of the event */
  uint64_t time_enabled; /* for TIME_ENABLED */
  uint64_t time_running; /* for TIME_RUNNING */
};

/*
 * Basic perf event handler for Android and Linux
 */
class PerfEvent {
 public:
  explicit PerfEvent(std::string& name) : name_(name) {}

  PerfEvent(const PerfEvent& other) = delete;
  PerfEvent& operator=(const PerfEvent&) = delete;
  PerfEvent& operator=(PerfEvent&& other) noexcept {
    if (this != &other) {
      fd_ = other.fd_;
      other.fd_ = -1;
      name_ = std::move(other.name_);
    }
    return *this;
  }

  PerfEvent(PerfEvent&& other) noexcept {
    *this = std::move(other);
  }

  ~PerfEvent();

  /* Setup perf events with the Linux Kernel, attaches perf to this process
   * using perf_event_open(2) */
  void Init();

  /* Stop incrementing hardware counters for this event */
  void Disable() const;

  /* Start counting hardware event from this point on */
  void Enable() const;

  /* Zero out the counts for this event */
  void Reset() const;

  /* Returns PerfCounter values for this event from kernel, on non supported
   * platforms this always returns zero */
  uint64_t ReadCounter() const;

 private:
  /* Name of the event */
  std::string name_;

  int fd_ = -1;
};

class PerfProfiler {
 public:
  /* Configure all the events and track them as individual PerfEvent */
  void Configure(std::vector<std::string>& event_names);

  /* Enable events counting from here */
  void Enable();

  /* Disable counting and fill in the caller supplied container with delta
   * calculated from the start count values since last Enable() */
  void Disable(perf_counters_t&);

 private:
  uint64_t CalcDelta(uint64_t start, uint64_t end) const;
  void StartCounting() const;
  void StopCounting() const;

  std::vector<PerfEvent> events_;
  std::stack<perf_counters_t> start_values_;
};
} // namespace torch::profiler::impl::linux_perf