File: combined_traceback.cpp

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (174 lines) | stat: -rw-r--r-- 5,866 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#include <torch/csrc/profiler/python/combined_traceback.h>
#include <torch/csrc/python_headers.h>
#include <torch/csrc/utils/pybind.h>
#include <torch/csrc/utils/pythoncapi_compat.h>
namespace py = pybind11;

namespace torch {
// Locking:
// We need to free PyCodeObjects when ~StackContext runs, but
// CUDACachingAllocator may hold its device lock when ~StackContext runs.

// Because the thread calling the allocator _may_ hold the GIL,
// attempting to lock the GIL in ~StackContext can deadlock:
// T0: GIL Lock -> Call Allocator    ->| Waiting Device Lock
// T1: Call Allocator -> Device Lock ->| Waiting GIL Lock
// Instead the destructor defers freeing stack frames by putting them in
// to_free_frames. We still need a lock to manage this vector, but
// we can ensure an overall lock ordering of GIL -> device_lock ->
// to_free_frames_mutex because ::gather is called outside of the device lock.

namespace {
static std::mutex to_free_frames_mutex;
static std::vector<CapturedTraceback::PyFrame> to_free_frames;
struct PythonTraceback : public CapturedTraceback::Python {
  std::vector<CapturedTraceback::PyFrame> gather() override {
    if (!Py_IsInitialized()) {
      return {};
    }
    std::vector<CapturedTraceback::PyFrame> frames;
    py::gil_scoped_acquire acquire;
    {
      std::lock_guard<std::mutex> lock(to_free_frames_mutex);
      for (CapturedTraceback::PyFrame f : to_free_frames) {
        Py_XDECREF(f.code);
      }
      to_free_frames.clear();
    }
    PyFrameObject* f = PyEval_GetFrame();
    Py_XINCREF(f);
    while (f) {
      frames.emplace_back(
          CapturedTraceback::PyFrame{PyFrame_GetCode(f), PyFrame_GetLasti(f)});
      auto f_back = PyFrame_GetBack(f);
      Py_XDECREF(f);
      f = f_back;
    }
    return frames;
  }
  void release(std::vector<CapturedTraceback::PyFrame>& frames) override {
    std::lock_guard<std::mutex> lock(to_free_frames_mutex);
    to_free_frames.insert(to_free_frames.end(), frames.begin(), frames.end());
  }
  using void_visitproc = int (*)(void* self, void* arg);
  int traverse(
      std::vector<CapturedTraceback::PyFrame>& frames,
      void_visitproc visit,
      void* arg) override {
    for (auto& f : frames) {
      Py_VISIT(f.code);
    }
    return 0;
  }
  int clear(std::vector<CapturedTraceback::PyFrame>& frames) override {
    for (auto& f : frames) {
      Py_CLEAR(f.code);
    }
    return 0;
  }
  void appendSymbolized(
      const std::vector<CapturedTraceback::PyFrame>& to_symbolize,
      SymbolizedTracebacks& result) override {
    py::gil_scoped_acquire acquire;
    py::str line_s = "line";
    py::str name_s = "name";
    py::str filename_s = "filename";

    auto torch = py::module::import("torch");
    py::object stack_frames_for_code;
    if (py::hasattr(torch, "_inductor")) {
      py::object inductor = torch.attr("_inductor");
      if (py::hasattr(inductor, "codecache")) {
        stack_frames_for_code = inductor.attr("codecache")
                                    .attr("PyCodeCache")
                                    .attr("stack_frames_for_code");
      }
    }
    for (const auto& f : to_symbolize) {
      auto f_code = (PyCodeObject*)f.code;
      py::handle filename = f_code->co_filename;
      py::handle funcname = f_code->co_name;
      auto lineno = PyCode_Addr2Line(f_code, f.lasti);
      result.tracebacks.emplace_back();
      result.tracebacks.back().push_back(result.all_frames.size());
      result.all_frames.emplace_back(unwind::Frame{
          py::cast<std::string>(filename),
          py::cast<std::string>(funcname),
          (uint64_t)lineno});
      // find all the additional frames associated with inductor generated
      // code
      if (stack_frames_for_code.ptr()) {
        py::object extra = stack_frames_for_code(filename, lineno);
        if (!extra.is_none()) {
          for (py::handle h : extra) {
            result.tracebacks.back().push_back(result.all_frames.size());
            result.all_frames.emplace_back(unwind::Frame{
                py::cast<std::string>(h[filename_s]),
                py::cast<std::string>(h[name_s]),
                py::cast<uint64_t>(h[line_s])});
          }
        }
      }
    }
  }
};

} // namespace

std::vector<py::object> py_symbolize(
    std::vector<CapturedTraceback*>& to_symbolize) {
  // we dedup repeated to_symbolize objects to prevent
  // creating a bunch of duplicated frame objects
  std::unordered_map<CapturedTraceback*, uint64_t> cached_frames;
  std::vector<CapturedTraceback*> unique_frames;
  for (const auto& sc : to_symbolize) {
    auto it = cached_frames.find(sc);
    if (it == cached_frames.end()) {
      cached_frames.insert({sc, unique_frames.size()});
      unique_frames.push_back(sc);
    }
  }
  auto s = symbolize(unique_frames);

  py::str line_s = "line";
  py::str name_s = "name";
  py::str filename_s = "filename";
  std::vector<py::dict> all_frames;
  for (const auto& f : s.all_frames) {
    py::dict d;
    d[name_s] = f.funcname;
    d[filename_s] = f.filename;
    d[line_s] = f.lineno;
    all_frames.emplace_back(std::move(d));
  }

  std::vector<py::object> py_unique_frames;
  for (const auto& t : s.tracebacks) {
    py::list l;
    for (const auto& e : t) {
      l.append(all_frames.at(e));
    }
    py_unique_frames.push_back(std::move(l));
  }

  std::vector<py::object> result;
  result.reserve(to_symbolize.size());
  for (const auto& sc : to_symbolize) {
    result.push_back(py_unique_frames.at(cached_frames.at(sc)));
  }
  return result;
}

void freeDeadCapturedTracebackFrames() {
  std::lock_guard<std::mutex> lock(to_free_frames_mutex);
  for (CapturedTraceback::PyFrame f : to_free_frames) {
    Py_XDECREF(f.code);
  }
  to_free_frames.clear();
}

void installCapturedTracebackPython() {
  CapturedTraceback::addPythonUnwinder(new PythonTraceback());
}

} // namespace torch