File: py_rref.h

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (81 lines) | stat: -rw-r--r-- 2,965 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#pragma once

#include <torch/csrc/distributed/rpc/rref_impl.h>
#include <torch/csrc/python_headers.h>
#include <torch/csrc/utils/pybind.h>

namespace torch::distributed::rpc {

// NOLINTNEXTLINE(performance-enum-size)
enum RRefProxyType { RPC_SYNC, RPC_ASYNC, REMOTE };

// Python wrapper of an RRef shared_ptr that supports Python
// pickle and unpickle.
class PYBIND11_EXPORT PyRRef {
 public:
  // The first ctor can only be called while holding GIL. See its implementation
  // for more explanations.
  explicit PyRRef(const py::object& value, const py::object& type_hint);
  explicit PyRRef(c10::intrusive_ptr<RRef> rref);
  PyRRef(const PyRRef&) = default;
  ~PyRRef();

  bool isOwner() const;
  bool confirmedByOwner() const;
  WorkerInfo owner() const;
  std::string ownerName() const;
  py::object toHere(
      const float timeoutSeconds =
          torch::distributed::rpc::kUnsetRpcTimeout) const;
  py::object localValue() const;
  std::string str() const;
  py::tuple pickle() const;
  static PyRRef unpickle(const py::tuple& t);
  c10::IValue toIValue() const;
  // Future that is associated with the creation of this RRef on the remote end.
  // This is only used to get the future corresponding to the rref for profiling
  // use cases.
  c10::intrusive_ptr<JitFuture> getFuture() const;
  // Keeps track of the future responsible for profiling owner creation
  // acknowledgement
  c10::intrusive_ptr<JitFuture> getProfilingFuture() const;
  // Sets the future responsible for profiling owner creation acknowledgement.
  // This future is set from python to be a future that returns when profiling
  // callbacks have been run.
  void setProfilingFuture(c10::intrusive_ptr<JitFuture> profilingFuture);

  // create a proxy on this RRef, which can be used to launch RPC on the owner
  // of this RRef to run functions on the object referenced by this RRef.
  py::object createRRefProxy(
      const RRefProxyType& mode,
      float timeoutSeconds = rpc::kUnsetRpcTimeout) const;

  // get the type of the data object referenced by this RRef. Timeout argument
  // is only used in the first invocation of this function as an argument to the
  // RPC to the owner node of the RRef.
  py::object getRRefType(
      float timeout = rpc::kUnsetRpcTimeout,
      bool blocking = true);

  // Run the backward pass with the RRef as the root.
  void backward(int64_t autogradContextId, bool retainGraph);

  // Helper static function to run backward on a given rref.
  static void backward(
      int64_t autogradContextId,
      bool retainGraph,
      const c10::intrusive_ptr<RRef>& rref);

  // Specialization of backward if the rref is an OwnerRRef.
  static void backwardOwnerRRef(
      int64_t autogradContextId,
      bool retainGraph,
      IValue value);

 private:
  c10::intrusive_ptr<RRef> rref_;
  std::optional<c10::intrusive_ptr<JitFuture>> profilingFuture_;
  std::optional<py::object> type_;
};

} // namespace torch::distributed::rpc