File: profiler_edge.cpp

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (131 lines) | stat: -rw-r--r-- 4,370 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#include <c10/core/Allocator.h>
#include <c10/util/Exception.h>
#include <c10/util/overloaded.h>
#include <torch/csrc/jit/mobile/profiler_edge.h>
#include <string>
#include <vector>

namespace torch {
namespace jit {
namespace mobile {

thread_local KinetoEdgeCPUProfiler* tls_edge_profiler{nullptr};

KinetoEdgeCPUProfiler::KinetoEdgeCPUProfiler(
    const torch::jit::mobile::Module& m,
    const std::string& fname,
    const bool report_input_shapes,
    const bool profile_memory,
    const bool with_stack,
    const bool with_flops,
    const bool with_modules)
    : m_(m), trace_file_name_(fname) {
  torch::profiler::impl::ProfilerConfig config(
      torch::profiler::impl::ProfilerState::KINETO,
      report_input_shapes,
      profile_memory,
      with_stack,
      with_flops,
      with_modules);
  torch::autograd::profiler::prepareProfiler(
      config, {torch::autograd::profiler::ActivityType::CPU});
  if (with_modules || with_stack) {
    auto post_processing = [this, with_stack, with_modules](
                               int64_t debug_handle,
                               std::vector<std::string>& jit_stack,
                               std::vector<std::string>& jit_modules) {
      std::string no_debug_info("Model was not saved with debug information");
      if (with_modules) {
        // Since KinetoEvents's module hierarchy takes vector of strings
        // we just construct a temporary vector using one string element
        jit_modules = std::vector<std::string>(
            {this->m_.hasDebugHandles()
                 ? this->m_.getModuleHierarchy(debug_handle)
                 : no_debug_info});
      } else if (with_stack) {
        // Since KinetoEvents's stack trace takes vector of strings we
        // just construct a temporary vector using one string element
        jit_stack = std::vector<std::string>(
            {this->m_.hasDebugHandles() ? this->m_.getCallStack(debug_handle)
                                        : no_debug_info});
      }
    };
    torch::autograd::profiler::enableProfilerWithEventPostProcess(
        config,
        {torch::autograd::profiler::ActivityType::CPU},
        post_processing,
        {at::RecordScope::LITE_INTERPRETER});
  } else {
    torch::autograd::profiler::enableProfiler(
        config,
        {torch::autograd::profiler::ActivityType::CPU},
        {at::RecordScope::LITE_INTERPRETER});
  }
  trace_file_name_ = fname;
  TORCH_CHECK(
      tls_edge_profiler == nullptr, "Edge profiler is already profiling.")
  tls_edge_profiler = this;
}

void KinetoEdgeCPUProfiler::recordBackendMemoryEvent(
    void* ptr,
    int64_t alloc_size,
    int64_t total_allocated,
    int64_t total_reserved,
    c10::Device device) {
  c10::reportMemoryUsageToProfiler(
      ptr, alloc_size, total_allocated, total_reserved, device);
}

void KinetoEdgeCPUProfiler::recordBackendEvent(
    const int64_t start_time_us,
    const int64_t end_time_us,
    const int64_t debug_handle,
    const std::string& event_name,
    const std::string& backend_name) {
  torch::autograd::profiler::reportBackendEventToActiveKinetoProfiler(
      start_time_us,
      end_time_us,
      debug_handle,
      at::RecordScope::LITE_INTERPRETER,
      event_name,
      backend_name);
}

const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
KinetoEdgeCPUProfiler::disableProfiler() {
  TORCH_CHECK(
      !profiler_result_,
      "KinetoEdgeCPUProfiler already disabled. "
      "To get list of events use getProfilerResults()");
  profiler_result_ = torch::autograd::profiler::disableProfiler();
  return profiler_result_;
}

const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
KinetoEdgeCPUProfiler::getProfilerResult() {
  TORCH_CHECK(
      profiler_result_,
      "KinetoEdgeCPUProfiler has not been disabled. "
      "use disableProfiler() API first, which returns the ProfilerResult.");
  return profiler_result_;
}

KinetoEdgeCPUProfiler::~KinetoEdgeCPUProfiler() {
  if (!trace_file_name_.empty()) {
    if (profiler_result_) {
      profiler_result_->save(trace_file_name_);
    } else {
      torch::autograd::profiler::disableProfiler()->save(trace_file_name_);
    }
  }
  tls_edge_profiler = nullptr;
}

KinetoEdgeCPUProfiler* getCurrentEdgeProfiler() {
  return tls_edge_profiler;
}

} // namespace mobile
} // namespace jit
} // namespace torch