File: numpy_tile_op.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (120 lines) | stat: -rw-r--r-- 3,800 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#ifndef CAFFE2_OPERATORS_NUMPY_TILE_OP_H_
#define CAFFE2_OPERATORS_NUMPY_TILE_OP_H_

#include "caffe2/core/common_omp.h"
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/utils/math.h"
#include "c10/util/irange.h"

namespace caffe2 {

// Copy a Blob n times along a specified axis.
template <class Context>
class NumpyTileOp : public Operator<Context> {
 public:
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  template <class... Args>
  explicit NumpyTileOp(Args&&... args)
      : Operator<Context>(std::forward<Args>(args)...) {}
  ~NumpyTileOp() {}

  bool RunOnDevice() override {
    const auto& input = Input(0);
    const auto& repeats = Input(1);

    // Check that the `repeats` tensor has the correct rank, has a number of
    // elements equal to the number of axes of `input`.
    CAFFE_ENFORCE_EQ(repeats.dim(), 1, "repeats input must be a 1-d tensor");
    CAFFE_ENFORCE_EQ(
        repeats.numel(),
        input.dim(),
        "repeats input have the same"
        " number of elements as `inputs` has dimensions.");
    const int64_t* repeats_data = repeats.template data<int64_t>();
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
    for (const auto i : c10::irange(repeats.numel())) {
      CAFFE_ENFORCE_GE(repeats_data[i], 0);
    }

    auto* output = Output(0);

    // Alternate inputs and outputs between two buffers. Repeatedly apply the
    // Tile kernel along each axis. Then copy out the resulting data into the
    // output tensor.
    Tensor *src = &buffer, *dst = output;
    src->CopyFrom(input);
    vector<int64_t> output_dims(input.sizes().vec());
    for (const auto i : c10::irange(repeats.numel())) {
      if (repeats_data[i] == 1) {
        continue;
      }
      // size up to (and not including) axis
      const auto outer_dim = src->size_to_dim(i);
      // size from axis up
      const auto inner_dim = src->size_from_dim(i);

      dst->Resize(outer_dim, inner_dim * repeats_data[i]);

      /**
       * How this works:
       * Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times.
       * - Tiling along axis 0 (row) means copying the entire 3x10 Matrix 2
       * times. outer_dim = 0, inner_dim = 30.
       * - Tiling along axis 1 (column) means copying each row 2 times, then
       * proceed to the next row, until the end. outer_dim = 3, inner_dim = 10.
       */
      const char* src_data = static_cast<const char*>(src->raw_data());
      char* dst_data = static_cast<char*>(dst->raw_mutable_data(src->dtype()));

      DoTile(
          src->dtype(),
          src->itemsize(),
          outer_dim,
          inner_dim,
          repeats_data[i],
          src_data,
          dst_data);

      output_dims[i] *= repeats_data[i];
      dst->Reshape(output_dims);

      std::swap(src, dst);
    }

    // NB: because we have the swap at the end of the above loop, our real
    // result tensor is going to live in *src when we reach this line
    // whether we entered the loop or not :)
    if (output != src)
      output->CopyFrom(*src);

    return true;
  }

 private:
  void DoTile(
      const TypeMeta meta,
      int item_size,
      int outer_dim,
      int inner_dim,
      int64_t num_tiles,
      const char* input_data,
      char* output_data) {
    for (const auto i : c10::irange(outer_dim)) {
      (void)i; // Suppress unused variable warning
      for (const auto t : c10::irange(num_tiles)) {
        (void)t; // Suppress unused variable warning
        context_.CopyItemsSameDevice(meta, inner_dim, input_data, output_data);
        output_data += inner_dim * item_size;
      }
      input_data += inner_dim * item_size;
    }
  }

  Tensor buffer{Context::GetDeviceType()};
};

} // namespace caffe2

#endif // CAFFE2_OPERATORS_NUMPY_TILE_OP_H_