File: python_tracer.h

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (60 lines) | stat: -rw-r--r-- 1,808 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#pragma once

#include <cstdint>
#include <memory>
#include <utility>
#include <vector>

#include <c10/util/ApproximateClock.h>
#include <c10/util/strong_type.h>

#include <torch/csrc/profiler/kineto_shim.h>
#include <torch/csrc/profiler/util.h>

namespace torch::profiler::impl {

class RecordQueue;
struct Result;
namespace python_tracer {

using TraceKey = strong::type<
    uint64_t,
    struct TraceKey_,
    strong::regular,
    strong::hashable,
    strong::ostreamable>;

struct CompressedEvent {
  TraceKey key_;
  uint64_t system_tid_{};
  kineto::DeviceAndResource kineto_info_{};
  c10::time_t enter_t_{};
};

/*
Libtorch does not depend on Python (e.g. cannot #include <Python.h>); however
when we call the profiler from libtorch_python we need the profiler to be able
to ingest the data that we collect from the Python tracer. (`PyEval_SetProfile`)

In order to solve this dependency issue we define a virtual base and a function
to register a getter. The python tracer then implements these functions and
exposes itself by calling `registerTracer` from `torch/csrc/autograd/init.cpp`.
This pattern of registration for faux python dependencies in libtorch is common
in the PyTorch codebase.
*/
struct TORCH_API PythonTracerBase {
  static std::unique_ptr<PythonTracerBase> make(RecordQueue* queue);
  virtual ~PythonTracerBase() = default;

  virtual void stop() = 0;
  virtual void restart() = 0;
  virtual std::vector<std::shared_ptr<Result>> getEvents(
      std::function<c10::time_t(c10::approx_time_t)> time_converter,
      std::vector<CompressedEvent>& enters,
      c10::time_t end_time_ns) = 0;
};

using MakeFn = std::unique_ptr<PythonTracerBase> (*)(RecordQueue*);
TORCH_API void registerTracer(MakeFn make_tracer);
} // namespace python_tracer
} // namespace torch::profiler::impl