File: lengths_tile_op.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (97 lines) | stat: -rw-r--r-- 2,821 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include "caffe2/operators/lengths_tile_op.h"

namespace caffe2 {

template <>
bool LengthsTileOp<CPUContext>::RunOnDevice() {
  auto& data = Input(DATA);
  auto& lengths = Input(LENGTHS);
  auto* output = Output(0);

  CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be 1-D");
  CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
  CAFFE_ENFORCE_EQ(lengths.numel(), data.size(0));

  // Context::CopyFrom and math::Sum need the same context to avoid race
  // conditions
  // why? CPUContext is not used in Sum
  lengths_host_.CopyFrom(lengths); // sync copy
  auto lengths_size = lengths_host_.numel();
  auto* lengths_data = lengths_host_.data<int32_t>();

  int32_t total_length = 0;
  CPUContext cpuContext;
  math::Sum<int32_t, CPUContext>(
      lengths_size, lengths_data, &total_length, &cpuContext);

  auto shape = data.sizes().vec();
  shape[0] = total_length;
  output->Resize(shape);

  auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
  auto src = static_cast<const char*>(data.raw_data());
  auto out = static_cast<char*>(output->raw_mutable_data(data.dtype()));

  for (int64_t i = 0; i < lengths_size; ++i) {
    auto length = lengths_data[i];
    CAFFE_ENFORCE_GE(length, 0);
    for (int32_t j = 0; j < length; ++j) {
      context_.CopyBytesSameDevice(block_bytesize, src, out);
      out += block_bytesize;
    }
    src += block_bytesize;
  }
  return true;
}

REGISTER_CPU_OPERATOR(LengthsTile, LengthsTileOp<CPUContext>);

OPERATOR_SCHEMA(LengthsTile)
    .NumInputs(2)
    .NumOutputs(1)
    .SetDoc(R"DOC(
Given DATA tensor of rank r >= 1, and LENGTHS tensor of rank 1, duplicate each
entry of the outer-most dimension of DATA according to LENGTHS, and concatenate
them in an output tensor of rank r.

Example:
  DATA  = [
      [1.0, 1.2],
      [2.3, 3.4],
      [4.5, 5.7],
      [6.8, 7.9],
  ]
  LENGTHS = [0, 1, 3, 2]
  OUTPUT = [
      [2.3, 3.4],
      [4.5, 5.7],
      [4.5, 5.7],
      [4.5, 5.7],
      [6.8, 7.9],
      [6.8, 7.9],
  ]
)DOC")
    .Input(
        0,
        "DATA",
        "Tensor of rank r >= 1. First dimension must be equal to the size of "
        "lengths")
    .Input(1, "LENGTHS", "Tensor of int32 lengths of rank 1")
    .Output(0, "OUTPUT", "Tensor of rank r");

class GetLengthsTileGradient : public GradientMakerBase {
  using GradientMakerBase::GradientMakerBase;
  vector<OperatorDef> GetGradientDefs() override {
    CAFFE_ENFORCE_EQ(def_.input_size(), 2);
    return SingleGradientDef(
        "LengthsSum",
        "",
        // input 1 is the lengths used to repeat
        // DATA in the forward pass
        vector<string>{GO(0), I(1)},
        // only concerned with the gradient on "DATA"
        vector<string>{GI(0)});
  }
};
REGISTER_GRADIENT(LengthsTile, GetLengthsTileGradient);
} // namespace caffe2