File: unsafe_coalesce.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (70 lines) | stat: -rw-r--r-- 2,533 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#ifndef CAFFE2_OPERATORS_UNSAFE_COALESCE_OP_H_
#define CAFFE2_OPERATORS_UNSAFE_COALESCE_OP_H_

#include "caffe2/core/context.h"
#include "caffe2/core/export_caffe2_op_to_c10.h"
#include <c10/util/irange.h>
#include "caffe2/core/operator.h"


namespace caffe2 {

template <class Context>
class UnsafeCoalesceOp final : public Operator<Context> {
 public:
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  using Operator<Context>::Operator;

  bool RunOnDevice() override {
    size_t coalesced_size = 0;
    for (const auto i : c10::irange(InputSize())) {
      // For now only float type is supported
      CAFFE_ENFORCE(
          Input(i).dtype().template Match<float>(),
          "Must only coalesce float type, error at input: ",
          i);
    }

    for (const auto i : c10::irange(InputSize())) {
      coalesced_size += Input(i).numel();
    }
    auto* coalesced = Output(OutputSize() - 1, coalesced_size, at::dtype<float>());
    auto coalesced_data = coalesced->template mutable_data<float>();

    size_t coalesced_offset = 0;
    for (const auto i : c10::irange(InputSize())) {
      const auto num_elems = Input(i).numel();
      auto input_sizes = Input(i).sizes().vec();
      // Don't do anything if both tensors are already pointing on the same data
      auto input_data = Input(i).template data<float>();
      if (input_data != coalesced_data + coalesced_offset) {
        // Make sure that we don't run operation on the same tensor
        CAFFE_ENFORCE_NE(
            input_data - Input(i).unsafeGetTensorImpl()->storage_offset(),
            coalesced_data -
                Output(OutputSize() - 1)
                    ->unsafeGetTensorImpl()
                    ->storage_offset(),
            "Tensors used in UnsafeCoalesce operator cannot share storage, unless it's inplace operation");
        context_.CopyItemsSameDevice(
            Input(i).dtype(),
            num_elems,
            input_data,
            coalesced_data + coalesced_offset);

        // Note: this could cause Input(i) to free it's data if
        // Output(i) and Input(i) alias each other. This is safe on a
        // GPU (as the copy will happen-before the free), but it's
        // worth mentioning.
        OperatorBase::SetOutputTensor(i, coalesced->Alias());
        Output(i)->unsafeGetTensorImpl()->set_storage_offset(coalesced_offset);
        Output(i)->Resize(input_sizes);
      }
      coalesced_offset += num_elems;
    }
    return true;
  }
};
} // namespace caffe2

#endif /* CAFFE2_OPERATORS_UNSAFE_COALESCE_OP_H_ */