File: data_flow.h

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (90 lines) | stat: -rw-r--r-- 3,625 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#pragma once

#include <memory>

#include <ATen/core/TensorBody.h>
#include <c10/core/TensorImpl.h>
#include <c10/macros/Macros.h>
#include <c10/util/strong_type.h>

namespace torch::profiler::impl {

// Identity is a complex concept in PyTorch. A Tensor might not have a
// an associated storage, multiple Tensors might share the same underlying
// storage, the storage of a Tensor might change over time, etc.
//
// For the purpose of profiling we're mostly interested in data flow
// analysis. As a result, we can take an expansive view of identity:
// Tensors share an ID if they share a TensorImpl or storage data.
//
// This identity equality is transitive; If Tensors T0 and T1 share a storage
// S0 and T1 later points to a different storage S1 then all Tensors which
// point to either S0 or S1 are considered to have the same identity. (Since
// profiler cannot reason beyond that.)
//
// The profiler will handle lifetime analysis to ensure that identities do
// not run afoul of the ABA problem. This does, however, mean that identities
// can only be assigned when memory profiling is enabled.
using TensorID = strong::type<size_t, struct TensorID_, strong::regular>;

// Uniquely identifies an allocation. (Generally a StorageImpl's data ptr.)
using AllocationID = strong::type<
    size_t,
    struct StorageID_,
    strong::ordered,
    strong::regular,
    strong::hashable>;

// We use a Tensor's TensorImpl adress and StorageImpl data start to build the
// data flow graph. We do not hold an owning reference so we wrap them in strong
// types to prevent direct access.
using TensorImplAddress = strong::type<
    const c10::TensorImpl*,
    struct TensorImplAddress_,
    strong::regular,
    strong::hashable,
    strong::boolean>;

using StorageImplData = strong::type<
    const void*,
    struct StorageImplData_,
    strong::regular,
    strong::hashable,
    strong::boolean>;

// ============================================================================
// == weak_intrusive_ptr and the ABA problem for TensorImpl* ==================
// ============================================================================
// Tracking `TensorImpl`s is an important part of identity tracking, because
// a Tensor might change storage; however when it does we want to retain the
// fact that the old and new storage belong to the same logical Tensor. We
// cannot take an owning reference to the Tensor because that would change
// program semantics by extending the lifetime of the Tensor. However if we
// store a raw TensorImpl* pointer the TensorImpl might be deleted and a new
// TensorImpl might be created that reuses the address. (ABA problem)
//
// Fortunately, there is a feature of `c10::intrusive_ptr` that we can use to
// prevent address reuse for the duration of profiling: the weak intrusive ptr.
// When a Tensor's refcount reaches zero but there are outstanding weak
// references (`weakcount_ > 0`) it will free the underlying managed resources
// by calling `target_->release_resources()`, but it will not call `delete`.
// (Instead, `delete` is called when the last weak reference is destroyed.)
// This means that we can safely use address identity to track `TensorImpls`.
class WeakTensor {
 public:
  explicit WeakTensor(const at::Tensor& t) : weak_self_(t.getIntrusivePtr()) {}

  auto get() const {
    return TensorImplAddress{weak_self_._unsafe_get_target()};
  }

 private:
  c10::weak_intrusive_ptr<c10::TensorImpl> weak_self_;
};

struct Result;

void calculateUniqueTensorIDs(
    std::vector<std::shared_ptr<Result>>& sorted_results);

} // namespace torch::profiler::impl