File: py_rref.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (83 lines) | stat: -rw-r--r-- 2,939 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#pragma once

#include <torch/csrc/distributed/rpc/rref_impl.h>
#include <torch/csrc/python_headers.h>
#include <torch/csrc/utils/pybind.h>

namespace torch {
namespace distributed {
namespace rpc {

enum RRefProxyType { RPC_SYNC, RPC_ASYNC, REMOTE };

// Python wrapper of an RRef shared_ptr that supports Python
// pickle and unpickle.
class PYBIND11_EXPORT PyRRef {
 public:
  // The first ctor can only be called while holding GIL. See its implementation
  // for more explanations.
  explicit PyRRef(const py::object& value, const py::object& type_hint);
  explicit PyRRef(c10::intrusive_ptr<RRef> rref);
  ~PyRRef();

  bool isOwner() const;
  bool confirmedByOwner() const;
  WorkerInfo owner() const;
  std::string ownerName() const;
  py::object toHere(
      const float timeoutSeconds =
          torch::distributed::rpc::kUnsetRpcTimeout) const;
  py::object localValue() const;
  std::string str() const;
  py::tuple pickle() const;
  static PyRRef unpickle(const py::tuple& t);
  c10::IValue toIValue() const;
  // Future that is associated with the creation of this RRef on the remote end.
  // This is only used to get the future corresponding to the rref for profiling
  // use cases.
  c10::intrusive_ptr<JitFuture> getFuture() const;
  // Keeps track of the future responsible for profiling owner creation
  // acknowledgement
  c10::intrusive_ptr<JitFuture> getProfilingFuture() const;
  // Sets the future responsible for profiling owner creation acknowledgement.
  // This future is set from python to be a future that returns when profiling
  // callbacks have been run.
  void setProfilingFuture(c10::intrusive_ptr<JitFuture> profilingFuture);

  // create a proxy on this RRef, which can be used to launch RPC on the owner
  // of this RRef to run functions on the object referenced by this RRef.
  py::object createRRefProxy(
      const RRefProxyType& mode,
      float timeoutSeconds = rpc::kUnsetRpcTimeout) const;

  // get the type of the data object referenced by this RRef. Timeout argument
  // is only used in the first invocation of this function as an argument to the
  // RPC to the owner node of the RRef.
  py::object getRRefType(
      float timeout = rpc::kUnsetRpcTimeout,
      bool blocking = true);

  // Run the backward pass with the RRef as the root.
  void backward(int64_t autogradContextId, bool retainGraph);

  // Helper static function to run backward on a given rref.
  static void backward(
      int64_t autogradContextId,
      bool retainGraph,
      const c10::intrusive_ptr<RRef>& rref);

  // Specialization of backward if the rref is an OwnerRRef.
  static void backwardOwnerRRef(
      int64_t autogradContextId,
      bool retainGraph,
      IValue value);

 private:
  c10::intrusive_ptr<RRef> rref_;
  c10::optional<c10::intrusive_ptr<JitFuture>> profilingFuture_;
  c10::optional<py::object> type_;
};

} // namespace rpc
} // namespace distributed
} // namespace torch