1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
#include "caffe2/operators/flexible_top_k.h"
#include "caffe2/proto/caffe2_pb.h"
namespace caffe2 {
namespace {
template <typename T>
struct ValueCmp {
bool operator()(
const std::pair<T, int64_t>& lhs,
const std::pair<T, int64_t>& rhs) {
return (
lhs.first > rhs.first ||
(lhs.first == rhs.first && lhs.second < rhs.second));
}
};
} // namespace
template <typename T, class Context>
bool FlexibleTopKOp<T, Context>::RunOnDevice() {
auto& input = Input(0);
auto& k = Input(1);
const T* input_data = input.template data<T>();
const int64_t* k_data = k.template data<int64_t>();
// get flatten shape of input
CAFFE_ENFORCE_GT(input.dim(), 0);
vector<int64_t> input_dims = input.sizes().vec();
vector<int64_t> linear_shape = {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
size_to_dim_(input_dims.size() - 1, input_dims), input_dims.back()};
CAFFE_ENFORCE_EQ(
linear_shape[0],
k.numel(),
"first n-1 dims of input data and K does not match.");
int64_t output_size = 0;
for (int64_t i = 0; i < linear_shape[0]; ++i) {
CAFFE_ENFORCE(
linear_shape[1] >= k_data[i],
"k should not be greater than last dim, error at index ",
i,
", with value: ",
k_data[i]);
CAFFE_ENFORCE(
k_data[i] > 0,
"k should be greater than 0, error at index ",
i,
", with value: ",
k_data[i]);
output_size += k_data[i];
}
auto* values = Output(0, {output_size}, at::dtype<T>());
auto* indices = Output(1, {output_size}, at::dtype<int64_t>());
T* values_data = values->template mutable_data<T>();
int64_t* indices_data = indices->template mutable_data<int64_t>();
int64_t output_offset = 0;
// Sort preserving indices
for (int64_t i = 0; i < linear_shape[0]; ++i) {
// Build a min-heap, the heap element is pair of (value, idx)
// the top of the heap is the smallest value
std::priority_queue<
std::pair<T, int64_t>,
std::vector<std::pair<T, int64_t>>,
ValueCmp<T>>
PQ;
int64_t k_ = k_data[i];
for (int64_t j = 0; j < linear_shape[1]; ++j) {
const T value = input_data[i * linear_shape[1] + j];
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
if (PQ.size() < k_ || value > PQ.top().first) {
PQ.push(std::make_pair(value, j));
}
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
if (PQ.size() > k_) {
PQ.pop();
}
}
for (int64_t j = 0; j < k_; ++j) {
auto& pqElem = PQ.top();
values_data[output_offset + k_ - j - 1] = pqElem.first;
indices_data[output_offset + k_ - j - 1] = pqElem.second;
PQ.pop();
}
output_offset += k_;
}
return true;
}
template <typename T, class Context>
bool FlexibleTopKGradientOp<T, Context>::RunOnDevice() {
auto& original_input = Input(0);
auto& k = Input(1);
auto& values = Input(2);
auto& indices = Input(3);
const int64_t* k_data = k.template data<int64_t>();
const T* values_data = values.template data<T>();
const int64_t* indices_data = indices.template data<int64_t>();
// Resize output tensors to be as orignial_input size and initialized with 0
CAFFE_ENFORCE_GT(original_input.dim(), 0);
vector<int64_t> original_dims = original_input.sizes().vec();
auto* output = Output(0, original_dims, at::dtype<T>());
T* output_data = output->template mutable_data<T>();
math::Set<T, Context>(
output->numel(), static_cast<T>(0), output_data, &context_);
int64_t index_offset = 0;
for (int64_t i = 0; i < k.numel(); ++i) {
// offset of output_data
int64_t output_offset = i * original_dims.back();
for (int64_t j = 0; j < k_data[i]; ++j) {
int64_t index = indices_data[index_offset + j];
T value = values_data[index_offset + j];
output_data[output_offset + index] = value;
}
index_offset += k_data[i];
}
return true;
}
REGISTER_CPU_OPERATOR(FlexibleTopK, FlexibleTopKOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
FlexibleTopKGradient,
FlexibleTopKGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(FlexibleTopK)
.NumInputs(2)
.NumOutputs(2)
.SetDoc(R"DOC(
Given two tensors: X and K,
retrieve the top K[..., 1] elements from X on the last dimension.
X is an input tensor of shape [a_1, a_2, ..., a_n, r].
K is an input tensor of shape [a_1, a_2, ..., a_n, 1],
where for each element, r >= K[..., 1] > 0
Output two outputs:
-Flatten values tensor of shape [ \sum_i K[i, 1] ] which contains the values of
the top K[..., 1] elements along the last dimension
-Flatten indices tensor of shape [ \sum_i K[i, 1] ] which contains the indices
of the top K[..., 1] elements, flatten indices from the input tensor).
These two outputs should be used with the input K, so that we know which indices
in X are picked.
Given two equivalent values, this operator uses the indices along the last dim-
ension as a tiebreaker. That is, the element with the lower index will appear
first.
)DOC")
.Input(0, "X", "Tensor of shape [a_1, a_2, ..., a_n, r]")
.Input(1, "K", "Tensor of shape [a_1, a_2, ..., a_n, 1]")
.Output(
0,
"Flatten values",
"Tensor of shape [ \\sum_i K[i, 1] ] containing"
" top K[..., 1] values from the input tensor")
.Output(
1,
"Flatten indices",
"Tensor of shape [ \\sum_i K[i, 1] ] containing the indices "
"into the flatten input");
OPERATOR_SCHEMA(FlexibleTopKGradient).NumInputs(4).NumOutputs(1);
class GetFlexibleTopKGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
"FlexibleTopKGradient",
"",
vector<string>{I(0), I(1), GO(0), O(1)},
vector<string>{GI(0)});
}
};
REGISTER_GRADIENT(FlexibleTopK, GetFlexibleTopKGradient);
} // namespace caffe2
|