File: bucketize_op.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (77 lines) | stat: -rw-r--r-- 2,324 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include "caffe2/operators/bucketize_op.h"

#include "caffe2/core/operator.h"
#include "caffe2/core/tensor.h"

namespace caffe2 {

template <>
bool BucketizeOp<CPUContext>::RunOnDevice() {
  auto& input = Input(X);
  CAFFE_ENFORCE_GE(input.dim(), 1);

  auto N = input.numel();
  auto* output = Output(INDICES, input.sizes(), at::dtype<int32_t>());
  const auto* input_data = input.template data<float>();
  auto* output_data = output->template mutable_data<int32_t>();

  math::Set<int32_t, CPUContext>(output->numel(), 0.0, output_data, &context_);

  for (int64_t pos = 0; pos < N; pos++) {
    // here we assume the boundary values for each feature are sorted
    auto bucket_idx =
        std::lower_bound(
            boundaries_.begin(), boundaries_.end(), input_data[pos]) -
        boundaries_.begin();
    output_data[pos] = bucket_idx;
  }

  return true;
};
REGISTER_CPU_OPERATOR(Bucketize, BucketizeOp<CPUContext>);

OPERATOR_SCHEMA(Bucketize)
    .NumInputs(1)
    .NumOutputs(1)
    .SetDoc(R"DOC(
This operator works as bucketize in tensorflow and digitize
in numpy. It bucketizes the input 'X' based on argument 'boundaries'.
For each value x in input 'data', the operator returns index i given
boundaries[i-1] < x <= boundaries[i].
If values in 'data' are beyond the bounds of boundaries, 0 or
len(boundaries) is returned as appropriate.
The boundaries need to be monotonically increasing.
For example

If data = [2, 4, 1] and boundaries = [0.1, 2.5], then

output = [1, 2, 1]

If data = [[2, 3], [4, 1], [2, 5]] and boundaries = [0.1, 2.5], then

output = [[1, 2], [2, 1], [1, 2]]

)DOC")
    .Input(0, "data", "input tensor")
    .Output(
        0,
        "output",
        "indices of bins given by boundaries to which each value"
        "in data belongs")
    .TensorInferenceFunction([](const OperatorDef& /* def */,
                                const vector<TensorShape>& in) {
      vector<TensorShape> out(in);
      out[0].set_data_type(TensorProto::INT32);
      return out;
    })
    .Arg("boundaries", "bucketization boundaries");

NO_GRADIENT(BucketizeOp);
} // namespace caffe2

using BucketizeInt = caffe2::BucketizeOp<caffe2::CPUContext>;

C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
    Bucketize,
    "_caffe2::Bucketize(Tensor data, float[] boundaries) -> Tensor output",
    BucketizeInt);