1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
|
//----------------------------------------------------------------------
// File: kd_split.cpp
// Programmer: Sunil Arya and David Mount
// Description: Methods for splitting kd-trees
// Last modified: 01/04/05 (Version 1.0)
//----------------------------------------------------------------------
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
// David Mount. All Rights Reserved.
//
// This software and related documentation is part of the Approximate
// Nearest Neighbor Library (ANN). This software is provided under
// the provisions of the Lesser GNU Public License (LGPL). See the
// file ../ReadMe.txt for further information.
//
// The University of Maryland (U.M.) and the authors make no
// representations about the suitability or fitness of this software for
// any purpose. It is provided "as is" without express or implied
// warranty.
//----------------------------------------------------------------------
// History:
// Revision 0.1 03/04/98
// Initial release
// Revision 1.0 04/01/05
//----------------------------------------------------------------------
#include "kd_tree.h" // kd-tree definitions
#include "kd_util.h" // kd-tree utilities
#include "kd_split.h" // splitting functions
//----------------------------------------------------------------------
// Constants
//----------------------------------------------------------------------
const double ERR = 0.001; // a small value
const double FS_ASPECT_RATIO = 3.0; // maximum allowed aspect ratio
// in fair split. Must be >= 2.
//----------------------------------------------------------------------
// kd_split - Bentley's standard splitting routine for kd-trees
// Find the dimension of the greatest spread, and split
// just before the median point along this dimension.
//----------------------------------------------------------------------
void kd_split(
ANNpointArray pa, // point array (permuted on return)
ANNidxArray pidx, // point indices
const ANNorthRect &bnds, // bounding rectangle for cell
int n, // number of points
int dim, // dimension of space
int &cut_dim, // cutting dimension (returned)
ANNcoord &cut_val, // cutting value (returned)
int &n_lo) // num of points on low side (returned)
{
// find dimension of maximum spread
cut_dim = annMaxSpread(pa, pidx, n, dim);
n_lo = n/2; // median rank
// split about median
annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo);
}
//----------------------------------------------------------------------
// midpt_split - midpoint splitting rule for box-decomposition trees
//
// This is the simplest splitting rule that guarantees boxes
// of bounded aspect ratio. It simply cuts the box with the
// longest side through its midpoint. If there are ties, it
// selects the dimension with the maximum point spread.
//
// WARNING: This routine (while simple) doesn't seem to work
// well in practice in high dimensions, because it tends to
// generate a large number of trivial and/or unbalanced splits.
// Either kd_split(), sl_midpt_split(), or fair_split() are
// recommended, instead.
//----------------------------------------------------------------------
void midpt_split(
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices (permuted on return)
const ANNorthRect &bnds, // bounding rectangle for cell
int n, // number of points
int dim, // dimension of space
int &cut_dim, // cutting dimension (returned)
ANNcoord &cut_val, // cutting value (returned)
int &n_lo) // num of points on low side (returned)
{
int d;
ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
for (d = 1; d < dim; d++) { // find length of longest box side
ANNcoord length = bnds.hi[d] - bnds.lo[d];
if (length > max_length) {
max_length = length;
}
}
ANNcoord max_spread = -1; // find long side with most spread
for (d = 0; d < dim; d++) {
// is it among longest?
if (double(bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) {
// compute its spread
ANNcoord spr = annSpread(pa, pidx, n, d);
if (spr > max_spread) { // is it max so far?
max_spread = spr;
cut_dim = d;
}
}
}
// split along cut_dim at midpoint
cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim]) / 2;
// permute points accordingly
int br1, br2;
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
//------------------------------------------------------------------
// On return: pa[0..br1-1] < cut_val
// pa[br1..br2-1] == cut_val
// pa[br2..n-1] > cut_val
//
// We can set n_lo to any value in the range [br1..br2].
// We choose split so that points are most evenly divided.
//------------------------------------------------------------------
if (br1 > n/2) n_lo = br1;
else if (br2 < n/2) n_lo = br2;
else n_lo = n/2;
}
//----------------------------------------------------------------------
// sl_midpt_split - sliding midpoint splitting rule
//
// This is a modification of midpt_split, which has the nonsensical
// name "sliding midpoint". The idea is that we try to use the
// midpoint rule, by bisecting the longest side. If there are
// ties, the dimension with the maximum spread is selected. If,
// however, the midpoint split produces a trivial split (no points
// on one side of the splitting plane) then we slide the splitting
// (maintaining its orientation) until it produces a nontrivial
// split. For example, if the splitting plane is along the x-axis,
// and all the data points have x-coordinate less than the x-bisector,
// then the split is taken along the maximum x-coordinate of the
// data points.
//
// Intuitively, this rule cannot generate trivial splits, and
// hence avoids midpt_split's tendency to produce trees with
// a very large number of nodes.
//
//----------------------------------------------------------------------
void sl_midpt_split(
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices (permuted on return)
const ANNorthRect &bnds, // bounding rectangle for cell
int n, // number of points
int dim, // dimension of space
int &cut_dim, // cutting dimension (returned)
ANNcoord &cut_val, // cutting value (returned)
int &n_lo) // num of points on low side (returned)
{
int d;
ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
for (d = 1; d < dim; d++) { // find length of longest box side
ANNcoord length = bnds.hi[d] - bnds.lo[d];
if (length > max_length) {
max_length = length;
}
}
ANNcoord max_spread = -1; // find long side with most spread
for (d = 0; d < dim; d++) {
// is it among longest?
if ((bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) {
// compute its spread
ANNcoord spr = annSpread(pa, pidx, n, d);
if (spr > max_spread) { // is it max so far?
max_spread = spr;
cut_dim = d;
}
}
}
// ideal split at midpoint
ANNcoord ideal_cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim])/2;
ANNcoord min, max;
annMinMax(pa, pidx, n, cut_dim, min, max); // find min/max coordinates
if (ideal_cut_val < min) // slide to min or max as needed
cut_val = min;
else if (ideal_cut_val > max)
cut_val = max;
else
cut_val = ideal_cut_val;
// permute points accordingly
int br1, br2;
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
//------------------------------------------------------------------
// On return: pa[0..br1-1] < cut_val
// pa[br1..br2-1] == cut_val
// pa[br2..n-1] > cut_val
//
// We can set n_lo to any value in the range [br1..br2] to satisfy
// the exit conditions of the procedure.
//
// if ideal_cut_val < min (implying br2 >= 1),
// then we select n_lo = 1 (so there is one point on left) and
// if ideal_cut_val > max (implying br1 <= n-1),
// then we select n_lo = n-1 (so there is one point on right).
// Otherwise, we select n_lo as close to n/2 as possible within
// [br1..br2].
//------------------------------------------------------------------
if (ideal_cut_val < min) n_lo = 1;
else if (ideal_cut_val > max) n_lo = n-1;
else if (br1 > n/2) n_lo = br1;
else if (br2 < n/2) n_lo = br2;
else n_lo = n/2;
}
//----------------------------------------------------------------------
// fair_split - fair-split splitting rule
//
// This is a compromise between the kd-tree splitting rule (which
// always splits data points at their median) and the midpoint
// splitting rule (which always splits a box through its center.
// The goal of this procedure is to achieve both nicely balanced
// splits, and boxes of bounded aspect ratio.
//
// A constant FS_ASPECT_RATIO is defined. Given a box, those sides
// which can be split so that the ratio of the longest to shortest
// side does not exceed ASPECT_RATIO are identified. Among these
// sides, we select the one in which the points have the largest
// spread. We then split the points in a manner which most evenly
// distributes the points on either side of the splitting plane,
// subject to maintaining the bound on the ratio of long to short
// sides. To determine that the aspect ratio will be preserved,
// we determine the longest side (other than this side), and
// determine how narrowly we can cut this side, without causing the
// aspect ratio bound to be exceeded (small_piece).
//
// This procedure is more robust than either kd_split or midpt_split,
// but is more complicated as well. When point distribution is
// extremely skewed, this degenerates to midpt_split (actually
// 1/3 point split), and when the points are most evenly distributed,
// this degenerates to kd-split.
//----------------------------------------------------------------------
void fair_split(
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices (permuted on return)
const ANNorthRect &bnds, // bounding rectangle for cell
int n, // number of points
int dim, // dimension of space
int &cut_dim, // cutting dimension (returned)
ANNcoord &cut_val, // cutting value (returned)
int &n_lo) // num of points on low side (returned)
{
int d;
ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
cut_dim = 0;
for (d = 1; d < dim; d++) { // find length of longest box side
ANNcoord length = bnds.hi[d] - bnds.lo[d];
if (length > max_length) {
max_length = length;
cut_dim = d;
}
}
ANNcoord max_spread = 0; // find legal cut with max spread
cut_dim = 0;
for (d = 0; d < dim; d++) {
ANNcoord length = bnds.hi[d] - bnds.lo[d];
// is this side midpoint splitable
// without violating aspect ratio?
if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) {
// compute spread along this dim
ANNcoord spr = annSpread(pa, pidx, n, d);
if (spr > max_spread) { // best spread so far
max_spread = spr;
cut_dim = d; // this is dimension to cut
}
}
}
max_length = 0; // find longest side other than cut_dim
for (d = 0; d < dim; d++) {
ANNcoord length = bnds.hi[d] - bnds.lo[d];
if (d != cut_dim && length > max_length)
max_length = length;
}
// consider most extreme splits
ANNcoord small_piece = max_length / FS_ASPECT_RATIO;
ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut
ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut
int br1, br2;
// is median below lo_cut ?
if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) {
cut_val = lo_cut; // cut at lo_cut
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
n_lo = br1;
}
// is median above hi_cut?
else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) {
cut_val = hi_cut; // cut at hi_cut
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
n_lo = br2;
}
else { // median cut preserves asp ratio
n_lo = n/2; // split about median
annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo);
}
}
//----------------------------------------------------------------------
// sl_fair_split - sliding fair split splitting rule
//
// Sliding fair split is a splitting rule that combines the
// strengths of both fair split with sliding midpoint split.
// Fair split tends to produce balanced splits when the points
// are roughly uniformly distributed, but it can produce many
// trivial splits when points are highly clustered. Sliding
// midpoint never produces trivial splits, and shrinks boxes
// nicely if points are highly clustered, but it may produce
// rather unbalanced splits when points are unclustered but not
// quite uniform.
//
// Sliding fair split is based on the theory that there are two
// types of splits that are "good": balanced splits that produce
// fat boxes, and unbalanced splits provided the cell with fewer
// points is fat.
//
// This splitting rule operates by first computing the longest
// side of the current bounding box. Then it asks which sides
// could be split (at the midpoint) and still satisfy the aspect
// ratio bound with respect to this side. Among these, it selects
// the side with the largest spread (as fair split would). It
// then considers the most extreme cuts that would be allowed by
// the aspect ratio bound. This is done by dividing the longest
// side of the box by the aspect ratio bound. If the median cut
// lies between these extreme cuts, then we use the median cut.
// If not, then consider the extreme cut that is closer to the
// median. If all the points lie to one side of this cut, then
// we slide the cut until it hits the first point. This may
// violate the aspect ratio bound, but will never generate empty
// cells. However the sibling of every such skinny cell is fat,
// and hence packing arguments still apply.
//
//----------------------------------------------------------------------
void sl_fair_split(
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices (permuted on return)
const ANNorthRect &bnds, // bounding rectangle for cell
int n, // number of points
int dim, // dimension of space
int &cut_dim, // cutting dimension (returned)
ANNcoord &cut_val, // cutting value (returned)
int &n_lo) // num of points on low side (returned)
{
int d;
ANNcoord min, max; // min/max coordinates
int br1, br2; // split break points
ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
cut_dim = 0;
for (d = 1; d < dim; d++) { // find length of longest box side
ANNcoord length = bnds.hi[d] - bnds.lo[d];
if (length > max_length) {
max_length = length;
cut_dim = d;
}
}
ANNcoord max_spread = 0; // find legal cut with max spread
cut_dim = 0;
for (d = 0; d < dim; d++) {
ANNcoord length = bnds.hi[d] - bnds.lo[d];
// is this side midpoint splitable
// without violating aspect ratio?
if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) {
// compute spread along this dim
ANNcoord spr = annSpread(pa, pidx, n, d);
if (spr > max_spread) { // best spread so far
max_spread = spr;
cut_dim = d; // this is dimension to cut
}
}
}
max_length = 0; // find longest side other than cut_dim
for (d = 0; d < dim; d++) {
ANNcoord length = bnds.hi[d] - bnds.lo[d];
if (d != cut_dim && length > max_length)
max_length = length;
}
// consider most extreme splits
ANNcoord small_piece = max_length / FS_ASPECT_RATIO;
ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut
ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut
// find min and max along cut_dim
annMinMax(pa, pidx, n, cut_dim, min, max);
// is median below lo_cut?
if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) {
if (max > lo_cut) { // are any points above lo_cut?
cut_val = lo_cut; // cut at lo_cut
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
n_lo = br1; // balance if there are ties
}
else { // all points below lo_cut
cut_val = max; // cut at max value
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
n_lo = n-1;
}
}
// is median above hi_cut?
else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) {
if (min < hi_cut) { // are any points below hi_cut?
cut_val = hi_cut; // cut at hi_cut
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
n_lo = br2; // balance if there are ties
}
else { // all points above hi_cut
cut_val = min; // cut at min value
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
n_lo = 1;
}
}
else { // median cut is good enough
n_lo = n/2; // split about median
annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo);
}
}
|