1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
|
/*
* Copyright (c) 2018 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "OpticalFlow.h"
#include "GaussianPyramidHalf.h"
#include "Scharr.h"
#include "Utils.h"
namespace arm_compute
{
namespace test
{
namespace validation
{
namespace reference
{
namespace
{
using KeyPointArray = std::vector<KeyPoint>;
using InternalKeyPointArray = std::vector<InternalKeyPoint>;
// Constants used for Lucas-Kanade Algorithm
constexpr int W_BITS = 14;
constexpr float D0 = 1 << W_BITS;
constexpr float DETERMINANT_THRESHOLD = 1.0e-07f;
constexpr float EIGENVALUE_THRESHOLD = 1.0e-04f;
constexpr float FLT_SCALE = 1.0f / (1 << 20);
// Creates an InternalKeyPointArray for tracking non-integral pixel coordinates
InternalKeyPointArray create_internal_keypoints(const KeyPointArray &keypoints)
{
InternalKeyPointArray internal_keypoints;
for(auto keypoint : keypoints)
{
InternalKeyPoint internal_keypoint;
internal_keypoint.x = static_cast<float>(keypoint.x);
internal_keypoint.y = static_cast<float>(keypoint.y);
internal_keypoint.tracking_status = static_cast<bool>(keypoint.tracking_status);
internal_keypoints.push_back(internal_keypoint);
}
return internal_keypoints;
}
// Scale tracked points based on Pyramid level
void scale_tracked_points(size_t level, size_t num_levels, bool use_initial_estimate,
InternalKeyPointArray &old_points_internal, InternalKeyPointArray &new_points_internal,
const KeyPointArray &old_points, const KeyPointArray &new_points_estimates)
{
if(level == num_levels - 1) // lowest resolution
{
const float scale = std::pow(SCALE_PYRAMID_HALF, level);
for(size_t i = 0; i < old_points.size(); ++i)
{
old_points_internal.at(i).x = old_points.at(i).x * scale;
old_points_internal.at(i).y = old_points.at(i).y * scale;
old_points_internal.at(i).tracking_status = true;
InternalKeyPoint keypoint_to_track;
if(use_initial_estimate)
{
keypoint_to_track.x = new_points_estimates.at(i).x * scale;
keypoint_to_track.y = new_points_estimates.at(i).y * scale;
keypoint_to_track.tracking_status = (new_points_estimates.at(i).tracking_status == 1);
}
else
{
keypoint_to_track.x = old_points_internal.at(i).x;
keypoint_to_track.y = old_points_internal.at(i).y;
keypoint_to_track.tracking_status = true;
}
new_points_internal.at(i) = keypoint_to_track;
}
}
else
{
for(size_t i = 0; i < old_points.size(); ++i)
{
old_points_internal.at(i).x /= SCALE_PYRAMID_HALF;
old_points_internal.at(i).y /= SCALE_PYRAMID_HALF;
new_points_internal.at(i).x /= SCALE_PYRAMID_HALF;
new_points_internal.at(i).y /= SCALE_PYRAMID_HALF;
}
}
}
bool is_invalid_keypoint(const InternalKeyPoint &keypoint, const ValidRegion &valid_region, size_t window_dimension)
{
const int half_window = window_dimension / 2;
const int x = std::floor(keypoint.x);
const int y = std::floor(keypoint.y);
return (x - half_window < valid_region.start(0)) || (x + half_window >= valid_region.end(0) - 1) || (y - half_window < valid_region.start(1)) || (y + half_window >= valid_region.end(1) - 1);
}
template <typename T>
constexpr int INT_ROUND(T x, int n)
{
return (x + (1 << (n - 1))) >> n;
}
// Return the bilinear value at a specified coordinate with different border modes
template <typename T>
int bilinear_interpolate(const SimpleTensor<T> &in, Coordinates id, float wx, float wy, BorderMode border_mode, T constant_border_value, int scale)
{
const int level = id.x();
const int idy = id.y();
const float dx = wx;
const float dy = wy;
const float dx_1 = 1.0f - dx;
const float dy_1 = 1.0f - dy;
const T border_value = constant_border_value;
id.set(0, level);
id.set(1, idy);
const T tl = tensor_elem_at(in, id, border_mode, border_value);
id.set(0, level + 1);
id.set(1, idy);
const T tr = tensor_elem_at(in, id, border_mode, border_value);
id.set(0, level);
id.set(1, idy + 1);
const T bl = tensor_elem_at(in, id, border_mode, border_value);
id.set(0, level + 1);
id.set(1, idy + 1);
const T br = tensor_elem_at(in, id, border_mode, border_value);
// weights
const int w00 = roundf(dx_1 * dy_1 * D0);
const int w01 = roundf(dx * dy_1 * D0);
const int w10 = roundf(dx_1 * dy * D0);
const int w11 = D0 - w00 - w01 - w10;
return static_cast<int>(INT_ROUND(tl * w00 + tr * w01 + bl * w10 + br * w11, scale));
}
template <typename T>
std::vector<int> compute_derivative(const SimpleTensor<T> &input, const InternalKeyPoint &keypoint,
BorderMode border_mode, uint8_t constant_border_value, size_t window_dimension, int scale)
{
std::vector<int> bilinear_values;
const int half_window = window_dimension / 2;
float keypoint_int_x = 0;
float keypoint_int_y = 0;
const float wx = std::modf(keypoint.x, &keypoint_int_x);
const float wy = std::modf(keypoint.y, &keypoint_int_y);
Coordinates tl_window(static_cast<int>(keypoint_int_x) - half_window, static_cast<int>(keypoint_int_y) - half_window);
Coordinates br_window(static_cast<int>(keypoint_int_x) + half_window, static_cast<int>(keypoint_int_y) + half_window);
for(int y = tl_window.y(); y <= br_window.y(); ++y)
{
for(int x = tl_window.x(); x <= br_window.x(); ++x)
{
bilinear_values.push_back(bilinear_interpolate(input, Coordinates(x, y), wx, wy, border_mode, static_cast<T>(constant_border_value), scale));
}
}
return bilinear_values;
}
std::tuple<float, float, float> compute_spatial_gradient_matrix(const std::vector<int> &bilinear_ix, const std::vector<int> &bilinear_iy)
{
ARM_COMPUTE_ERROR_ON(bilinear_ix.size() != bilinear_iy.size());
int iA11 = 0;
int iA12 = 0;
int iA22 = 0;
for(size_t i = 0; i < bilinear_ix.size(); ++i)
{
int ixval = bilinear_ix[i];
int iyval = bilinear_iy[i];
iA11 += ixval * ixval;
iA12 += ixval * iyval;
iA22 += iyval * iyval;
}
return std::make_tuple(iA11 * FLT_SCALE, iA12 * FLT_SCALE, iA22 * FLT_SCALE);
}
std::tuple<double, double> compute_temporal_gradient_vector(const std::vector<int> &bilinear_it_old,
const std::vector<int> &bilinear_it_new,
const std::vector<int> &bilinear_ix,
const std::vector<int> &bilinear_iy)
{
ARM_COMPUTE_ERROR_ON(bilinear_ix.size() != bilinear_iy.size());
ARM_COMPUTE_ERROR_ON(bilinear_it_old.size() != bilinear_it_new.size());
int ib1 = 0;
int ib2 = 0;
for(size_t i = 0; i < bilinear_ix.size(); ++i)
{
int ixval = bilinear_ix[i];
int iyval = bilinear_iy[i];
int ival = bilinear_it_old[i];
int jval = bilinear_it_new[i];
const int diff = jval - ival;
ib1 += diff * ixval;
ib2 += diff * iyval;
}
const double b1 = ib1 * FLT_SCALE;
const double b2 = ib2 * FLT_SCALE;
return std::make_tuple(b1, b2);
}
} // namespace
template <typename T>
std::vector<KeyPoint> optical_flow(const SimpleTensor<T> &old_input, const SimpleTensor<T> &new_input,
const OpticalFlowParameters ¶ms, size_t num_levels,
const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates,
BorderMode border_mode, uint8_t constant_border_value)
{
const int filter_size = 3; // scharr filter size
const size_t max_iterations = 1000; // fixed by kernel
const size_t window_dimension = params.window_dimension;
const size_t num_iterations = (params.termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : params.num_iterations;
KeyPointArray new_points(old_points.size());
InternalKeyPointArray old_points_internal = create_internal_keypoints(old_points);
InternalKeyPointArray new_points_internal = create_internal_keypoints(new_points_estimates);
SimpleTensor<int16_t> scharr_gx;
SimpleTensor<int16_t> scharr_gy;
// Create pyramids
std::vector<SimpleTensor<T>> old_pyramid = gaussian_pyramid_half(old_input, border_mode, constant_border_value, num_levels);
std::vector<SimpleTensor<T>> new_pyramid = gaussian_pyramid_half(new_input, border_mode, constant_border_value, num_levels);
// Iterate over each level of the pyramid
for(size_t idx = num_levels; idx > 0; --idx)
{
const size_t level = idx - 1;
// Calculate scharr gradients
std::tie(scharr_gx, scharr_gy) = scharr<int16_t, T>(old_pyramid[level], filter_size, border_mode, constant_border_value, GradientDimension::GRAD_XY);
scale_tracked_points(level, num_levels, params.use_initial_estimate, old_points_internal, new_points_internal, old_points, new_points_estimates);
// Calculate valid region based on image dimensions of current pyramid level
const ValidRegion valid_region = shape_to_valid_region(old_pyramid[level].shape(), (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
for(size_t i = 0; i < old_points.size(); ++i)
{
InternalKeyPoint &old_keypoint = old_points_internal.at(i);
InternalKeyPoint &new_keypoint = new_points_internal.at(i);
// Helper function for untracking keypoints when on the lowest pyramid level (high resolution)
const auto untrack_keypoint = [&](bool predicate)
{
if(predicate && (level == 0))
{
new_keypoint.tracking_status = false;
return true;
}
return predicate;
};
if(!old_keypoint.tracking_status)
{
continue;
}
// Check if tracked coordinate is outside image coordinate
if(untrack_keypoint(is_invalid_keypoint(old_keypoint, valid_region, window_dimension)))
{
continue;
}
// Compute spatial derivative
std::vector<int> bilinear_ix = compute_derivative(scharr_gx, old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS);
std::vector<int> bilinear_iy = compute_derivative(scharr_gy, old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS);
float A11 = 0.f;
float A12 = 0.f;
float A22 = 0.f;
std::tie(A11, A12, A22) = compute_spatial_gradient_matrix(bilinear_ix, bilinear_iy);
// Calculate criteria for lost tracking : Matrix A is invertible
// 1. The determinant of the matrix is less than DETERMINANT_THRESHOLD
// 2. The minimum eigenvalue of the matrix is less than EIGENVALUE_THRESHOLD
const float trace_A = A11 + A22;
const float determinant = A11 * A22 - A12 * A12;
const float discriminant = (trace_A * trace_A) - 4.0f * (determinant);
const float eigenvalue_A = (trace_A - std::sqrt(discriminant)) / 2.0f;
// Divide by window_dimension squared to reduce the floating point accummulation error
const float eigenvalue = eigenvalue_A / (window_dimension * window_dimension);
// Check if it is a good point to track
if(untrack_keypoint(eigenvalue < EIGENVALUE_THRESHOLD || determinant < DETERMINANT_THRESHOLD))
{
continue;
}
float prev_delta_x = 0.f;
float prev_delta_y = 0.f;
for(size_t j = 0; j < num_iterations; ++j)
{
// Check if tracked coordinate is outside image coordinate
if(untrack_keypoint(is_invalid_keypoint(new_keypoint, valid_region, window_dimension)))
{
break;
}
// Compute temporal derivative
std::vector<int> bilinear_it_old = compute_derivative(old_pyramid[level], old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS - 5);
std::vector<int> bilinear_it_new = compute_derivative(new_pyramid[level], new_keypoint, border_mode, constant_border_value, window_dimension, W_BITS - 5);
double b1 = 0.f;
double b2 = 0.f;
std::tie(b1, b2) = compute_temporal_gradient_vector(bilinear_it_old, bilinear_it_new, bilinear_ix, bilinear_iy);
// Compute motion vector -> A^-1 * -b
const float delta_x = (A12 * b2 - A22 * b1) / determinant;
const float delta_y = (A12 * b1 - A11 * b2) / determinant;
// Update the new position
new_keypoint.x += delta_x;
new_keypoint.y += delta_y;
const float magnitude_squared = delta_x * delta_x + delta_y * delta_y;
// Check if termination criteria is EPSILON and if it is satisfied
if(magnitude_squared <= params.epsilon && (params.termination == Termination::TERM_CRITERIA_EPSILON || params.termination == Termination::TERM_CRITERIA_BOTH))
{
break;
}
// Check convergence analyzing the previous delta
if(j > 0 && (std::fabs(delta_x + prev_delta_x) < 0.01f && std::fabs(delta_y + prev_delta_y) < 0.01f))
{
new_keypoint.x -= delta_x * SCALE_PYRAMID_HALF;
new_keypoint.y -= delta_y * SCALE_PYRAMID_HALF;
break;
}
prev_delta_x = delta_x;
prev_delta_y = delta_y;
}
}
}
// Copy optical flow coordinates to output vector
for(size_t i = 0; i < old_points.size(); ++i)
{
const InternalKeyPoint &new_keypoint = new_points_internal.at(i);
new_points.at(i).x = roundf(new_keypoint.x);
new_points.at(i).y = roundf(new_keypoint.y);
new_points.at(i).tracking_status = new_keypoint.tracking_status ? 1 : 0;
}
return new_points;
}
template std::vector<KeyPoint> optical_flow(const SimpleTensor<uint8_t> &old_input, const SimpleTensor<uint8_t> &new_input,
const OpticalFlowParameters ¶ms, size_t num_levels,
const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates,
BorderMode border_mode, uint8_t constant_border_value);
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
|