1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
#include "heatmap_max_keypoint_op.h"
#include "caffe2/utils/eigen_utils.h"
namespace caffe2 {
namespace {
REGISTER_CPU_OPERATOR(
HeatmapMaxKeypoint,
HeatmapMaxKeypointOp<float, CPUContext>);
// Input: heatmaps [size x size], boxes [x0, y0, x1, y1]
// Output: keypoints (#rois, 4, #keypoints)
OPERATOR_SCHEMA(HeatmapMaxKeypoint).NumInputs(2).NumOutputs(1);
SHOULD_NOT_DO_GRADIENT(HeatmapMaxKeypoint);
} // namespace
/**
Mask R-CNN uses bicubic upscaling before taking the maximum of the heat map
for keypoints. We would like to avoid bicubic upscaling, because it is
computationally expensive. This approach uses the Taylor expansion up to the
quadratic terms on approximation of the heatmap function.
**/
template <>
bool HeatmapMaxKeypointOp<float, CPUContext>::RunOnDevice() {
const auto& heatmaps_in = Input(0);
const auto& bboxes_in = Input(1);
CAFFE_ENFORCE_EQ(heatmaps_in.dim(), 4);
const int N = heatmaps_in.dim32(0);
CAFFE_ENFORCE_EQ(heatmaps_in.dim32(0), N);
const int keypoint_count = heatmaps_in.dim32(1);
const int heatmap_size = heatmaps_in.dim32(2);
CAFFE_ENFORCE_GE(heatmap_size, 2); // at least 2x2 for approx
CAFFE_ENFORCE_EQ(heatmaps_in.dim32(2), heatmaps_in.dim32(3));
CAFFE_ENFORCE_EQ(bboxes_in.dim(), 2);
CAFFE_ENFORCE_EQ(bboxes_in.dim32(0), N);
CAFFE_ENFORCE_GE(bboxes_in.dim32(1), 4);
// Wrap inputs in Eigen
Eigen::Map<const ERArrXXf> heatmaps(
heatmaps_in.data<float>(),
heatmaps_in.dim32(0) * heatmaps_in.dim32(1),
heatmaps_in.dim32(2) * heatmaps_in.dim32(3));
Eigen::Map<const ERArrXXf> bboxes(
bboxes_in.data<float>(), bboxes_in.dim32(0), bboxes_in.dim32(1));
// Calculate the softmax
ERArrXXf probs(
heatmaps_in.dim32(0) * heatmaps_in.dim32(1),
heatmaps_in.dim32(2) * heatmaps_in.dim32(3));
if (should_output_softmax_) {
// softmax output is expensive to compute, if should_output_softmax is not
// specified, don't populate it
ERArrXXf heatmap_exp = heatmaps.exp();
for (int r = 0; r < N * keypoint_count; r++) {
probs.row(r) = heatmap_exp.row(r) / heatmap_exp.row(r).sum();
}
} /* otherwise not initialized */
// Resize and wrap outputs in Eigen
auto* keypoints_out = Output(0, {N, 4, keypoint_count}, at::dtype<float>());
Eigen::Map<ERArrXXf> keypoints(
keypoints_out->mutable_data<float>(), N, 4 * keypoint_count);
EArrXi maxIndices(N * keypoint_count);
// finding max value first (only maxCoeff() is vectorized, not
// maxCoeff(&index)), then find the index (equalness check is also fast)
EArrXf maxScores = heatmaps.rowwise().maxCoeff();
for (int r = 0; r < N * keypoint_count; r++) {
float maxScore = maxScores[r];
for (int c = 0; c < heatmap_size * heatmap_size; c++) {
if (heatmaps(r, c) == maxScore) {
maxIndices[r] = c;
break;
}
}
}
// Populate outputs
for (int k = 0; k < N; k++) { // For each box, even skipped
float x0 = bboxes(k, 0);
float y0 = bboxes(k, 1);
float xLen = std::max(bboxes(k, 2) - bboxes(k, 0), 1.0f);
float yLen = std::max(bboxes(k, 3) - bboxes(k, 1), 1.0f);
// Extract max keypoints and probabilities from heatmaps
for (int j = 0; j < keypoint_count; j++) {
const int heatmap_index = k * keypoint_count + j;
const int maxIndex = maxIndices[heatmap_index];
const float maxScore = maxScores[heatmap_index];
const int maxY = maxIndex / heatmap_size;
const int maxX = maxIndex - heatmap_size * maxY;
assert(heatmaps(heatmap_index, maxIndex) == maxScore);
ERArrXXf fmax = ERArrXXf::Zero(3, 3);
// initialize fmax values of local 3x3 grid
// when 3x3 grid going out-of-bound, mirrowing around center
for (int y = -1; y <= 1; y++) {
for (int x = -1; x <= 1; x++) {
int xx = x - 2 * (x + maxX >= heatmap_size) + 2 * (x + maxX < 0);
int yy = y - 2 * (y + maxY >= heatmap_size) + 2 * (y + maxY < 0);
assert((xx + maxX < heatmap_size) && (xx + maxX >= 0));
assert((yy + maxY < heatmap_size) && (yy + maxY >= 0));
const int coord_index = (yy + maxY) * heatmap_size + xx + maxX;
fmax(y + 1, x + 1) = heatmaps(heatmap_index, coord_index);
}
}
// b = -f'(0), A = f''(0) Hessian matrix
EVecXf b(2);
b << -(fmax(1, 2) - fmax(1, 0)) / 2, -(fmax(2, 1) - fmax(0, 1)) / 2;
EMatXf A(2, 2);
A << fmax(1, 0) - 2 * fmax(1, 1) + fmax(1, 2),
(fmax(2, 2) - fmax(2, 0) - fmax(0, 2) + fmax(0, 0)) / 4,
(fmax(2, 2) - fmax(2, 0) - fmax(0, 2) + fmax(0, 0)) / 4,
fmax(0, 1) - 2 * fmax(1, 1) + fmax(2, 1);
// Solve Ax=b
const float div = A.determinant();
EVecXf delta(2);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
float deltaScore;
const float MAX_DELTA = 1.5;
if (std::abs(div) < 1e-4f) {
delta << 0.0f, 0.0f;
deltaScore = maxScore;
} else {
delta = A.ldlt().solve(b);
// clip delta if going out-of-range of 3x3 grid
if (std::abs(delta(0)) > MAX_DELTA || std::abs(delta(1)) > MAX_DELTA) {
float larger_delta = std::max(std::abs(delta(0)), std::abs(delta(1)));
delta(0) = delta(0) / larger_delta * MAX_DELTA;
delta(1) = delta(1) / larger_delta * MAX_DELTA;
}
deltaScore = fmax(1, 1) - b.transpose() * delta +
1.0 / 2.0 * delta.transpose() * A * delta;
}
assert(std::abs(delta(0)) <= MAX_DELTA);
assert(std::abs(delta(1)) <= MAX_DELTA);
// find maximum of delta scores
keypoints(k, 0 * keypoint_count + j) =
x0 + (0.5 + maxX + delta(0)) * xLen / heatmap_size;
keypoints(k, 1 * keypoint_count + j) =
y0 + (0.5 + maxY + delta(1)) * yLen / heatmap_size;
keypoints(k, 2 * keypoint_count + j) = deltaScore;
if (should_output_softmax_) {
keypoints(k, 3 * keypoint_count + j) = probs(heatmap_index, maxIndex);
} else {
keypoints(k, 3 * keypoint_count + j) = .0f;
}
}
}
return true;
}
} // namespace caffe2
using HeatmapMaxKeypointOpFloatCPU =
caffe2::HeatmapMaxKeypointOp<float, caffe2::CPUContext>;
// clang-format off
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
HeatmapMaxKeypoint,
"_caffe2::HeatmapMaxKeypoint("
"Tensor heatmaps, "
"Tensor bboxes_in, "
"bool should_output_softmax = True"
") -> Tensor keypoints",
HeatmapMaxKeypointOpFloatCPU);
// clang-format on
|