1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
|
//----------------------------------------------------------------------
// File: kd_tree.cpp
// Programmer: Sunil Arya and David Mount
// Description: Basic methods for kd-trees.
// Last modified: 01/04/05 (Version 1.0)
//----------------------------------------------------------------------
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
// David Mount. All Rights Reserved.
//
// This software and related documentation is part of the Approximate
// Nearest Neighbor Library (ANN). This software is provided under
// the provisions of the Lesser GNU Public License (LGPL). See the
// file ../ReadMe.txt for further information.
//
// The University of Maryland (U.M.) and the authors make no
// representations about the suitability or fitness of this software for
// any purpose. It is provided "as is" without express or implied
// warranty.
//----------------------------------------------------------------------
// History:
// Revision 0.1 03/04/98
// Initial release
// Revision 1.0 04/01/05
// Increased aspect ratio bound (ANN_AR_TOOBIG) from 100 to 1000.
// Fixed leaf counts to count trivial leaves.
// Added optional pa, pi arguments to Skeleton kd_tree constructor
// for use in load constructor.
// Added annClose() to eliminate KD_TRIVIAL memory leak.
//----------------------------------------------------------------------
#include "kd_tree.h" // kd-tree declarations
#include "kd_split.h" // kd-tree splitting rules
#include "kd_util.h" // kd-tree utilities
#include <ANN/ANNperf.h> // performance evaluation
//----------------------------------------------------------------------
// Global data
//
// For some splitting rules, especially with small bucket sizes,
// it is possible to generate a large number of empty leaf nodes.
// To save storage we allocate a single trivial leaf node which
// contains no points. For messy coding reasons it is convenient
// to have it reference a trivial point index.
//
// KD_TRIVIAL is allocated when the first kd-tree is created. It
// must *never* deallocated (since it may be shared by more than
// one tree).
//----------------------------------------------------------------------
static int IDX_TRIVIAL[] = {0}; // trivial point index
ANNkd_leaf *KD_TRIVIAL = NULL; // trivial leaf node
//----------------------------------------------------------------------
// Printing the kd-tree
// These routines print a kd-tree in reverse inorder (high then
// root then low). (This is so that if you look at the output
// from the right side it appear from left to right in standard
// inorder.) When outputting leaves we output only the point
// indices rather than the point coordinates. There is an option
// to print the point coordinates separately.
//
// The tree printing routine calls the printing routines on the
// individual nodes of the tree, passing in the level or depth
// in the tree. The level in the tree is used to print indentation
// for readability.
//----------------------------------------------------------------------
void ANNkd_split::print( // print splitting node
int level, // depth of node in tree
ostream &out) // output stream
{
child[ANN_HI]->print(level+1, out); // print high child
out << " ";
for (int i = 0; i < level; i++) // print indentation
out << "..";
out << "Split cd=" << cut_dim << " cv=" << cut_val;
out << " lbnd=" << cd_bnds[ANN_LO];
out << " hbnd=" << cd_bnds[ANN_HI];
out << "\n";
child[ANN_LO]->print(level+1, out); // print low child
}
void ANNkd_leaf::print( // print leaf node
int level, // depth of node in tree
ostream &out) // output stream
{
out << " ";
for (int i = 0; i < level; i++) // print indentation
out << "..";
if (this == KD_TRIVIAL) { // canonical trivial leaf node
out << "Leaf (trivial)\n";
}
else{
out << "Leaf n=" << n_pts << " <";
for (int j = 0; j < n_pts; j++) {
out << bkt[j];
if (j < n_pts-1) out << ",";
}
out << ">\n";
}
}
void ANNkd_tree::Print( // print entire tree
ANNbool with_pts, // print points as well?
ostream &out) // output stream
{
out << "ANN Version " << ANNversion << "\n";
if (with_pts) { // print point coordinates
out << " Points:\n";
for (int i = 0; i < n_pts; i++) {
out << "\t" << i << ": ";
annPrintPt(pts[i], dim, out);
out << "\n";
}
}
if (root == NULL) // empty tree?
out << " Null tree.\n";
else {
root->print(0, out); // invoke printing at root
}
}
//----------------------------------------------------------------------
// kd_tree statistics (for performance evaluation)
// This routine compute various statistics information for
// a kd-tree. It is used by the implementors for performance
// evaluation of the data structure.
//----------------------------------------------------------------------
#define MAX(a,b) ((a) > (b) ? (a) : (b))
void ANNkdStats::merge(const ANNkdStats &st) // merge stats from child
{
n_lf += st.n_lf; n_tl += st.n_tl;
n_spl += st.n_spl; n_shr += st.n_shr;
depth = MAX(depth, st.depth);
sum_ar += st.sum_ar;
}
//----------------------------------------------------------------------
// Update statistics for nodes
//----------------------------------------------------------------------
const double ANN_AR_TOOBIG = 1000; // too big an aspect ratio
void ANNkd_leaf::getStats( // get subtree statistics
int dim, // dimension of space
ANNkdStats &st, // stats (modified)
ANNorthRect &bnd_box) // bounding box
{
st.reset();
st.n_lf = 1; // count this leaf
if (this == KD_TRIVIAL) st.n_tl = 1; // count trivial leaf
double ar = annAspectRatio(dim, bnd_box); // aspect ratio of leaf
// incr sum (ignore outliers)
st.sum_ar += float(ar < ANN_AR_TOOBIG ? ar : ANN_AR_TOOBIG);
}
void ANNkd_split::getStats( // get subtree statistics
int dim, // dimension of space
ANNkdStats &st, // stats (modified)
ANNorthRect &bnd_box) // bounding box
{
ANNkdStats ch_stats; // stats for children
// get stats for low child
ANNcoord hv = bnd_box.hi[cut_dim]; // save box bounds
bnd_box.hi[cut_dim] = cut_val; // upper bound for low child
ch_stats.reset(); // reset
child[ANN_LO]->getStats(dim, ch_stats, bnd_box);
st.merge(ch_stats); // merge them
bnd_box.hi[cut_dim] = hv; // restore bound
// get stats for high child
ANNcoord lv = bnd_box.lo[cut_dim]; // save box bounds
bnd_box.lo[cut_dim] = cut_val; // lower bound for high child
ch_stats.reset(); // reset
child[ANN_HI]->getStats(dim, ch_stats, bnd_box);
st.merge(ch_stats); // merge them
bnd_box.lo[cut_dim] = lv; // restore bound
st.depth++; // increment depth
st.n_spl++; // increment number of splits
}
//----------------------------------------------------------------------
// getStats
// Collects a number of statistics related to kd_tree or
// bd_tree.
//----------------------------------------------------------------------
void ANNkd_tree::getStats( // get tree statistics
ANNkdStats &st) // stats (modified)
{
st.reset(dim, n_pts, bkt_size); // reset stats
// create bounding box
if (pts != NULL) {
ANNorthRect bnd_box(dim, bnd_box_lo, bnd_box_hi);
if (root != NULL) { // if nonempty tree
root->getStats(dim, st, bnd_box); // get statistics
st.avg_ar = st.sum_ar / st.n_lf; // average leaf asp ratio
}
}
}
//----------------------------------------------------------------------
// kd_tree destructor
// The destructor just frees the various elements that were
// allocated in the construction process.
//----------------------------------------------------------------------
ANNkd_tree::~ANNkd_tree() // tree destructor
{
if (root != NULL) delete root;
if (pidx != NULL) delete [] pidx;
if (bnd_box_lo != NULL) annDeallocPt(bnd_box_lo);
if (bnd_box_hi != NULL) annDeallocPt(bnd_box_hi);
}
//----------------------------------------------------------------------
// This is called with all use of ANN is finished. It eliminates the
// minor memory leak caused by the allocation of KD_TRIVIAL.
//----------------------------------------------------------------------
void annClose() // close use of ANN
{
if (KD_TRIVIAL != NULL) {
delete KD_TRIVIAL;
KD_TRIVIAL = NULL;
}
}
//----------------------------------------------------------------------
// kd_tree constructors
// There is a skeleton kd-tree constructor which sets up a
// trivial empty tree. The last optional argument allows
// the routine to be passed a point index array which is
// assumed to be of the proper size (n). Otherwise, one is
// allocated and initialized to the identity. Warning: In
// either case the destructor will deallocate this array.
//
// As a kludge, we need to allocate KD_TRIVIAL if one has not
// already been allocated. (This is because I'm too dumb to
// figure out how to cause a pointer to be allocated at load
// time.)
//----------------------------------------------------------------------
void ANNkd_tree::SkeletonTree( // construct skeleton tree
int n, // number of points
int dd, // dimension
int bs, // bucket size
ANNpointArray pa, // point array
ANNidxArray pi) // point indices
{
dim = dd; // initialize basic elements
n_pts = n;
bkt_size = bs;
pts = pa; // initialize points array
root = NULL; // no associated tree yet
if (pi == NULL) { // point indices provided?
pidx = new ANNidx[n]; // no, allocate space for point indices
for (int i = 0; i < n; i++) {
pidx[i] = i; // initially identity
}
}
else {
pidx = pi; // yes, use them
}
bnd_box_lo = bnd_box_hi = NULL; // bounding box is nonexistent
if (KD_TRIVIAL == NULL) // no trivial leaf node yet?
KD_TRIVIAL = new ANNkd_leaf(0, IDX_TRIVIAL); // allocate it
}
ANNkd_tree::ANNkd_tree( // basic constructor
int n, // number of points
int dd, // dimension
int bs) // bucket size
{ SkeletonTree(n, dd, bs); } // construct skeleton tree
//----------------------------------------------------------------------
// rkd_tree - recursive procedure to build a kd-tree
//
// Builds a kd-tree for points in pa as indexed through the
// array pidx[0..n-1] (typically a subarray of the array used in
// the top-level call). This routine permutes the array pidx,
// but does not alter pa[].
//
// The construction is based on a standard algorithm for constructing
// the kd-tree (see Friedman, Bentley, and Finkel, ``An algorithm for
// finding best matches in logarithmic expected time,'' ACM Transactions
// on Mathematical Software, 3(3):209-226, 1977). The procedure
// operates by a simple divide-and-conquer strategy, which determines
// an appropriate orthogonal cutting plane (see below), and splits
// the points. When the number of points falls below the bucket size,
// we simply store the points in a leaf node's bucket.
//
// One of the arguments is a pointer to a splitting routine,
// whose prototype is:
//
// void split(
// ANNpointArray pa, // complete point array
// ANNidxArray pidx, // point array (permuted on return)
// ANNorthRect &bnds, // bounds of current cell
// int n, // number of points
// int dim, // dimension of space
// int &cut_dim, // cutting dimension
// ANNcoord &cut_val, // cutting value
// int &n_lo) // no. of points on low side of cut
//
// This procedure selects a cutting dimension and cutting value,
// partitions pa about these values, and returns the number of
// points on the low side of the cut.
//----------------------------------------------------------------------
ANNkd_ptr rkd_tree( // recursive construction of kd-tree
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices to store in subtree
int n, // number of points
int dim, // dimension of space
int bsp, // bucket space
ANNorthRect &bnd_box, // bounding box for current node
ANNkd_splitter splitter) // splitting routine
{
if (n <= bsp) { // n small, make a leaf node
if (n == 0) // empty leaf node
return KD_TRIVIAL; // return (canonical) empty leaf
else // construct the node and return
return new ANNkd_leaf(n, pidx);
}
else { // n large, make a splitting node
int cd; // cutting dimension
ANNcoord cv; // cutting value
int n_lo; // number on low side of cut
ANNkd_node *lo, *hi; // low and high children
// invoke splitting procedure
(*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);
ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension
ANNcoord hv = bnd_box.hi[cd];
bnd_box.hi[cd] = cv; // modify bounds for left subtree
lo = rkd_tree( // build left subtree
pa, pidx, n_lo, // ...from pidx[0..n_lo-1]
dim, bsp, bnd_box, splitter);
bnd_box.hi[cd] = hv; // restore bounds
bnd_box.lo[cd] = cv; // modify bounds for right subtree
hi = rkd_tree( // build right subtree
pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
dim, bsp, bnd_box, splitter);
bnd_box.lo[cd] = lv; // restore bounds
// create the splitting node
ANNkd_split *ptr = new ANNkd_split(cd, cv, lv, hv, lo, hi);
return ptr; // return pointer to this node
}
}
//----------------------------------------------------------------------
// kd-tree constructor
// This is the main constructor for kd-trees given a set of points.
// It first builds a skeleton tree, then computes the bounding box
// of the data points, and then invokes rkd_tree() to actually
// build the tree, passing it the appropriate splitting routine.
//----------------------------------------------------------------------
ANNkd_tree::ANNkd_tree( // construct from point array
ANNpointArray pa, // point array (with at least n pts)
int n, // number of points
int dd, // dimension
int bs, // bucket size
ANNsplitRule split) // splitting method
{
SkeletonTree(n, dd, bs); // set up the basic stuff
pts = pa; // where the points are
if (n == 0) return; // no points--no sweat
ANNorthRect bnd_box(dd); // bounding box for points
annEnclRect(pa, pidx, n, dd, bnd_box);// construct bounding rectangle
// copy to tree structure
bnd_box_lo = annCopyPt(dd, bnd_box.lo);
bnd_box_hi = annCopyPt(dd, bnd_box.hi);
switch (split) { // build by rule
case ANN_KD_STD: // standard kd-splitting rule
root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split);
break;
case ANN_KD_MIDPT: // midpoint split
root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split);
break;
case ANN_KD_FAIR: // fair split
root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split);
break;
case ANN_KD_SUGGEST: // best (in our opinion)
case ANN_KD_SL_MIDPT: // sliding midpoint split
root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split);
break;
case ANN_KD_SL_FAIR: // sliding fair split
root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_fair_split);
break;
default:
annError("Illegal splitting method", ANNabort);
}
}
|