1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
|
#include <torch/csrc/profiler/python/combined_traceback.h>
#include <torch/csrc/python_headers.h>
#include <torch/csrc/utils/pybind.h>
#include <torch/csrc/utils/pythoncapi_compat.h>
namespace py = pybind11;
namespace torch {
// Locking:
// We need to free PyCodeObjects when ~StackContext runs, but
// CUDACachingAllocator may hold its device lock when ~StackContext runs.
// Because the thread calling the allocator _may_ hold the GIL,
// attempting to lock the GIL in ~StackContext can deadlock:
// T0: GIL Lock -> Call Allocator ->| Waiting Device Lock
// T1: Call Allocator -> Device Lock ->| Waiting GIL Lock
// Instead the destructor defers freeing stack frames by putting them in
// to_free_frames. We still need a lock to manage this vector, but
// we can ensure an overall lock ordering of GIL -> device_lock ->
// to_free_frames_mutex because ::gather is called outside of the device lock.
namespace {
static std::mutex to_free_frames_mutex;
static std::vector<CapturedTraceback::PyFrame> to_free_frames;
struct PythonTraceback : public CapturedTraceback::Python {
std::vector<CapturedTraceback::PyFrame> gather() override {
if (!Py_IsInitialized()) {
return {};
}
std::vector<CapturedTraceback::PyFrame> frames;
py::gil_scoped_acquire acquire;
{
std::lock_guard<std::mutex> lock(to_free_frames_mutex);
for (CapturedTraceback::PyFrame f : to_free_frames) {
Py_XDECREF(f.code);
}
to_free_frames.clear();
}
PyFrameObject* f = PyEval_GetFrame();
Py_XINCREF(f);
while (f) {
frames.emplace_back(
CapturedTraceback::PyFrame{PyFrame_GetCode(f), PyFrame_GetLasti(f)});
auto f_back = PyFrame_GetBack(f);
Py_XDECREF(f);
f = f_back;
}
return frames;
}
void release(std::vector<CapturedTraceback::PyFrame>& frames) override {
std::lock_guard<std::mutex> lock(to_free_frames_mutex);
to_free_frames.insert(to_free_frames.end(), frames.begin(), frames.end());
}
using void_visitproc = int (*)(void* self, void* arg);
int traverse(
std::vector<CapturedTraceback::PyFrame>& frames,
void_visitproc visit,
void* arg) override {
for (auto& f : frames) {
Py_VISIT(f.code);
}
return 0;
}
int clear(std::vector<CapturedTraceback::PyFrame>& frames) override {
for (auto& f : frames) {
Py_CLEAR(f.code);
}
return 0;
}
void appendSymbolized(
const std::vector<CapturedTraceback::PyFrame>& to_symbolize,
SymbolizedTracebacks& result) override {
py::gil_scoped_acquire acquire;
py::str line_s = "line";
py::str name_s = "name";
py::str filename_s = "filename";
auto torch = py::module::import("torch");
py::object stack_frames_for_code;
if (py::hasattr(torch, "_inductor")) {
py::object inductor = torch.attr("_inductor");
if (py::hasattr(inductor, "codecache")) {
stack_frames_for_code = inductor.attr("codecache")
.attr("PyCodeCache")
.attr("stack_frames_for_code");
}
}
for (const auto& f : to_symbolize) {
auto f_code = (PyCodeObject*)f.code;
py::handle filename = f_code->co_filename;
py::handle funcname = f_code->co_name;
auto lineno = PyCode_Addr2Line(f_code, f.lasti);
result.tracebacks.emplace_back();
result.tracebacks.back().push_back(result.all_frames.size());
result.all_frames.emplace_back(unwind::Frame{
py::cast<std::string>(filename),
py::cast<std::string>(funcname),
(uint64_t)lineno});
// find all the additional frames associated with inductor generated
// code
if (stack_frames_for_code.ptr()) {
py::object extra = stack_frames_for_code(filename, lineno);
if (!extra.is_none()) {
for (py::handle h : extra) {
result.tracebacks.back().push_back(result.all_frames.size());
result.all_frames.emplace_back(unwind::Frame{
py::cast<std::string>(h[filename_s]),
py::cast<std::string>(h[name_s]),
py::cast<uint64_t>(h[line_s])});
}
}
}
}
}
};
} // namespace
std::vector<py::object> py_symbolize(
std::vector<CapturedTraceback*>& to_symbolize) {
// we dedup repeated to_symbolize objects to prevent
// creating a bunch of duplicated frame objects
std::unordered_map<CapturedTraceback*, uint64_t> cached_frames;
std::vector<CapturedTraceback*> unique_frames;
for (const auto& sc : to_symbolize) {
auto it = cached_frames.find(sc);
if (it == cached_frames.end()) {
cached_frames.insert({sc, unique_frames.size()});
unique_frames.push_back(sc);
}
}
auto s = symbolize(unique_frames);
py::str line_s = "line";
py::str name_s = "name";
py::str filename_s = "filename";
std::vector<py::dict> all_frames;
for (const auto& f : s.all_frames) {
py::dict d;
d[name_s] = f.funcname;
d[filename_s] = f.filename;
d[line_s] = f.lineno;
all_frames.emplace_back(std::move(d));
}
std::vector<py::object> py_unique_frames;
for (const auto& t : s.tracebacks) {
py::list l;
for (const auto& e : t) {
l.append(all_frames.at(e));
}
py_unique_frames.push_back(std::move(l));
}
std::vector<py::object> result;
result.reserve(to_symbolize.size());
for (const auto& sc : to_symbolize) {
result.push_back(py_unique_frames.at(cached_frames.at(sc)));
}
return result;
}
void freeDeadCapturedTracebackFrames() {
std::lock_guard<std::mutex> lock(to_free_frames_mutex);
for (CapturedTraceback::PyFrame f : to_free_frames) {
Py_XDECREF(f.code);
}
to_free_frames.clear();
}
void installCapturedTracebackPython() {
CapturedTraceback::addPythonUnwinder(new PythonTraceback());
}
} // namespace torch
|