File: kd_dump.cpp

package info (click to toggle)
mldemos 0.5.1-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 32,224 kB
  • ctags: 46,525
  • sloc: cpp: 306,887; ansic: 167,718; ml: 126; sh: 109; makefile: 2
file content (444 lines) | stat: -rw-r--r-- 17,117 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
//----------------------------------------------------------------------
// File:			kd_dump.cc
// Programmer:		David Mount
// Description:		Dump and Load for kd- and bd-trees
// Last modified:	01/04/05 (Version 1.0)
//----------------------------------------------------------------------
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
// David Mount.  All Rights Reserved.
// 
// This software and related documentation is part of the Approximate
// Nearest Neighbor Library (ANN).  This software is provided under
// the provisions of the Lesser GNU Public License (LGPL).  See the
// file ../ReadMe.txt for further information.
// 
// The University of Maryland (U.M.) and the authors make no
// representations about the suitability or fitness of this software for
// any purpose.  It is provided "as is" without express or implied
// warranty.
//----------------------------------------------------------------------
// History:
//	Revision 0.1  03/04/98
//		Initial release
//	Revision 1.0  04/01/05
//		Moved dump out of kd_tree.cc into this file.
//		Added kd-tree load constructor.
//----------------------------------------------------------------------
// This file contains routines for dumping kd-trees and bd-trees and
// reloading them. (It is an abuse of policy to include both kd- and
// bd-tree routines in the same file, sorry.  There should be no problem
// in deleting the bd- versions of the routines if they are not
// desired.)
//----------------------------------------------------------------------

#include "kd_tree.h"					// kd-tree declarations
#include "bd_tree.h"					// bd-tree declarations

using namespace std;					// make std:: available

//----------------------------------------------------------------------
//		Constants
//----------------------------------------------------------------------

const int		STRING_LEN		= 500;	// maximum string length
const double	EPSILON			= 1E-5; // small number for float comparison

enum ANNtreeType {KD_TREE, BD_TREE};	// tree types (used in loading)

//----------------------------------------------------------------------
//		Procedure declarations
//----------------------------------------------------------------------

static ANNkd_ptr annReadDump(			// read dump file
	istream				&in,					// input stream
	ANNtreeType			tree_type,				// type of tree expected
	ANNpointArray		&the_pts,				// new points (if applic)
	ANNidxArray			&the_pidx,				// point indices (returned)
	int					&the_dim,				// dimension (returned)
	int					&the_n_pts,				// number of points (returned)
	int					&the_bkt_size,			// bucket size (returned)
	ANNpoint			&the_bnd_box_lo,		// low bounding point
	ANNpoint			&the_bnd_box_hi);		// high bounding point

static ANNkd_ptr annReadTree(			// read tree-part of dump file
	istream				&in,					// input stream
	ANNtreeType			tree_type,				// type of tree expected
	ANNidxArray			the_pidx,				// point indices (modified)
	int					&next_idx);				// next index (modified)

//----------------------------------------------------------------------
//	ANN kd- and bd-tree Dump Format
//		The dump file begins with a header containing the version of
//		ANN, an optional section containing the points, followed by
//		a description of the tree.	The tree is printed in preorder.
//
//		Format:
//		#ANN <version number> <comments> [END_OF_LINE]
//		points <dim> <n_pts>			(point coordinates: this is optional)
//		0 <xxx> <xxx> ... <xxx>			(point indices and coordinates)
//		1 <xxx> <xxx> ... <xxx>
//		  ...
//		tree <dim> <n_pts> <bkt_size>
//		<xxx> <xxx> ... <xxx>			(lower end of bounding box)
//		<xxx> <xxx> ... <xxx>			(upper end of bounding box)
//				If the tree is null, then a single line "null" is
//				output.	 Otherwise the nodes of the tree are printed
//				one per line in preorder.  Leaves and splitting nodes 
//				have the following formats:
//		Leaf node:
//				leaf <n_pts> <bkt[0]> <bkt[1]> ... <bkt[n-1]>
//		Splitting nodes:
//				split <cut_dim> <cut_val> <lo_bound> <hi_bound>
//
//		For bd-trees:
//
//		Shrinking nodes:
//				shrink <n_bnds>
//						<cut_dim> <cut_val> <side>
//						<cut_dim> <cut_val> <side>
//						... (repeated n_bnds times)
//----------------------------------------------------------------------

void ANNkd_tree::Dump(					// dump entire tree
		ANNbool with_pts,				// print points as well?
		ostream &out)					// output stream
{
	out << "#ANN " << ANNversion << "\n";
	out.precision(ANNcoordPrec);		// use full precision in dumping
	if (with_pts) {						// print point coordinates
		out << "points " << dim << " " << n_pts << "\n";
		for (int i = 0; i < n_pts; i++) {
			out << i << " ";
			annPrintPt(pts[i], dim, out);
			out << "\n";
		}
	}
	out << "tree "						// print tree elements
		<< dim << " "
		<< n_pts << " "
		<< bkt_size << "\n";

	annPrintPt(bnd_box_lo, dim, out);	// print lower bound
	out << "\n";
	annPrintPt(bnd_box_hi, dim, out);	// print upper bound
	out << "\n";

	if (root == NULL)					// empty tree?
		out << "null\n";
	else {
		root->dump(out);				// invoke printing at root
	}
	out.precision(0);					// restore default precision
}

void ANNkd_split::dump(					// dump a splitting node
		ostream &out)					// output stream
{
	out << "split " << cut_dim << " " << cut_val << " ";
	out << cd_bnds[ANN_LO] << " " << cd_bnds[ANN_HI] << "\n";

	child[ANN_LO]->dump(out);			// print low child
	child[ANN_HI]->dump(out);			// print high child
}

void ANNkd_leaf::dump(					// dump a leaf node
		ostream &out)					// output stream
{
	if (this == KD_TRIVIAL) {			// canonical trivial leaf node
		out << "leaf 0\n";				// leaf no points
	}
	else{
		out << "leaf " << n_pts;
		for (int j = 0; j < n_pts; j++) {
			out << " " << bkt[j];
		}
		out << "\n";
	}
}

void ANNbd_shrink::dump(				// dump a shrinking node
		ostream &out)					// output stream
{
	out << "shrink " << n_bnds << "\n";
	for (int j = 0; j < n_bnds; j++) {
		out << bnds[j].cd << " " << bnds[j].cv << " " << bnds[j].sd << "\n";
	}
	child[ANN_IN]->dump(out);			// print in-child
	child[ANN_OUT]->dump(out);			// print out-child
}

//----------------------------------------------------------------------
// Load kd-tree from dump file
//		This rebuilds a kd-tree which was dumped to a file.	 The dump
//		file contains all the basic tree information according to a
//		preorder traversal.	 We assume that the dump file also contains
//		point data.	 (This is to guarantee the consistency of the tree.)
//		If not, then an error is generated.
//
//		Indirectly, this procedure allocates space for points, point
//		indices, all nodes in the tree, and the bounding box for the
//		tree.  When the tree is destroyed, all but the points are
//		deallocated.
//
//		This routine calls annReadDump to do all the work.
//----------------------------------------------------------------------

ANNkd_tree::ANNkd_tree(					// build from dump file
	istream				&in)					// input stream for dump file
{
	int the_dim;								// local dimension
	int the_n_pts;								// local number of points
	int the_bkt_size;							// local number of points
	ANNpoint the_bnd_box_lo;					// low bounding point
	ANNpoint the_bnd_box_hi;					// high bounding point
	ANNpointArray the_pts;						// point storage
	ANNidxArray the_pidx;						// point index storage
	ANNkd_ptr the_root;							// root of the tree

	the_root = annReadDump(						// read the dump file
		in,										// input stream
		KD_TREE,								// expecting a kd-tree
		the_pts,								// point array (returned)
		the_pidx,								// point indices (returned)
		the_dim, the_n_pts, the_bkt_size,		// basic tree info (returned)
		the_bnd_box_lo, the_bnd_box_hi);		// bounding box info (returned)

												// create a skeletal tree
	SkeletonTree(the_n_pts, the_dim, the_bkt_size, the_pts, the_pidx);

	bnd_box_lo = the_bnd_box_lo;
	bnd_box_hi = the_bnd_box_hi;

	root = the_root;							// set the root
}

ANNbd_tree::ANNbd_tree(					// build bd-tree from dump file
	istream				&in) : ANNkd_tree()		// input stream for dump file
{
	int the_dim;								// local dimension
	int the_n_pts;								// local number of points
	int the_bkt_size;							// local number of points
	ANNpoint the_bnd_box_lo;					// low bounding point
	ANNpoint the_bnd_box_hi;					// high bounding point
	ANNpointArray the_pts;						// point storage
	ANNidxArray the_pidx;						// point index storage
	ANNkd_ptr the_root;							// root of the tree

	the_root = annReadDump(						// read the dump file
		in,										// input stream
		BD_TREE,								// expecting a bd-tree
		the_pts,								// point array (returned)
		the_pidx,								// point indices (returned)
		the_dim, the_n_pts, the_bkt_size,		// basic tree info (returned)
		the_bnd_box_lo, the_bnd_box_hi);		// bounding box info (returned)

												// create a skeletal tree
	SkeletonTree(the_n_pts, the_dim, the_bkt_size, the_pts, the_pidx);
	bnd_box_lo = the_bnd_box_lo;
	bnd_box_hi = the_bnd_box_hi;

	root = the_root;							// set the root
}

//----------------------------------------------------------------------
//	annReadDump - read a dump file
//
//		This procedure reads a dump file, constructs a kd-tree
//		and returns all the essential information needed to actually
//		construct the tree.	 Because this procedure is used for
//		constructing both kd-trees and bd-trees, the second argument
//		is used to indicate which type of tree we are expecting.
//----------------------------------------------------------------------

static ANNkd_ptr annReadDump(
	istream				&in,					// input stream
	ANNtreeType			tree_type,				// type of tree expected
	ANNpointArray		&the_pts,				// new points (returned)
	ANNidxArray			&the_pidx,				// point indices (returned)
	int					&the_dim,				// dimension (returned)
	int					&the_n_pts,				// number of points (returned)
	int					&the_bkt_size,			// bucket size (returned)
	ANNpoint			&the_bnd_box_lo,		// low bounding point (ret'd)
	ANNpoint			&the_bnd_box_hi)		// high bounding point (ret'd)
{
	int j;
	char str[STRING_LEN];						// storage for string
	char version[STRING_LEN];					// ANN version number
	ANNkd_ptr the_root = NULL;

	//------------------------------------------------------------------
	//	Input file header
	//------------------------------------------------------------------
	in >> str;									// input header
	if (strcmp(str, "#ANN") != 0) {				// incorrect header
		annError("Incorrect header for dump file", ANNabort);
	}
	in.getline(version, STRING_LEN);			// get version (ignore)

	//------------------------------------------------------------------
	//	Input the points
	//			An array the_pts is allocated and points are read from
	//			the dump file.
	//------------------------------------------------------------------
	in >> str;									// get major heading
	if (strcmp(str, "points") == 0) {			// points section
		in >> the_dim;							// input dimension
		in >> the_n_pts;						// number of points
												// allocate point storage
		the_pts = annAllocPts(the_n_pts, the_dim);
		for (int i = 0; i < the_n_pts; i++) {	// input point coordinates
			ANNidx idx;							// point index
			in >> idx;							// input point index
			if (idx < 0 || idx >= the_n_pts) {
				annError("Point index is out of range", ANNabort);
			}
			for (j = 0; j < the_dim; j++) {
				in >> the_pts[idx][j];			// read point coordinates
			}
		}
		in >> str;								// get next major heading
	}
	else {										// no points were input
		annError("Points must be supplied in the dump file", ANNabort);
	}

	//------------------------------------------------------------------
	//	Input the tree
	//			After the basic header information, we invoke annReadTree
	//			to do all the heavy work.  We create our own array of
	//			point indices (so we can pass them to annReadTree())
	//			but we do not deallocate them.	They will be deallocated
	//			when the tree is destroyed.
	//------------------------------------------------------------------
	if (strcmp(str, "tree") == 0) {				// tree section
		in >> the_dim;							// read dimension
		in >> the_n_pts;						// number of points
		in >> the_bkt_size;						// bucket size
		the_bnd_box_lo = annAllocPt(the_dim);	// allocate bounding box pts
		the_bnd_box_hi = annAllocPt(the_dim);

		for (j = 0; j < the_dim; j++) {			// read bounding box low
			in >> the_bnd_box_lo[j];
		}
		for (j = 0; j < the_dim; j++) {			// read bounding box low
			in >> the_bnd_box_hi[j];
		}
		the_pidx = new ANNidx[the_n_pts];		// allocate point index array
		int next_idx = 0;						// number of indices filled
												// read the tree and indices
		the_root = annReadTree(in, tree_type, the_pidx, next_idx);
		if (next_idx != the_n_pts) {			// didn't see all the points?
			annError("Didn't see as many points as expected", ANNwarn);
		}
	}
	else {
		annError("Illegal dump format.	Expecting section heading", ANNabort);
	}
	return the_root;
}

//----------------------------------------------------------------------
// annReadTree - input tree and return pointer
//
//		annReadTree reads in a node of the tree, makes any recursive
//		calls as needed to input the children of this node (if internal).
//		It returns a pointer to the node that was created.	An array
//		of point indices is given along with a pointer to the next
//		available location in the array.  As leaves are read, their
//		point indices are stored here, and the point buckets point
//		to the first entry in the array.
//
//		Recall that these are the formats.	The tree is given in
//		preorder.
//
//		Leaf node:
//				leaf <n_pts> <bkt[0]> <bkt[1]> ... <bkt[n-1]>
//		Splitting nodes:
//				split <cut_dim> <cut_val> <lo_bound> <hi_bound>
//
//		For bd-trees:
//
//		Shrinking nodes:
//				shrink <n_bnds>
//						<cut_dim> <cut_val> <side>
//						<cut_dim> <cut_val> <side>
//						... (repeated n_bnds times)
//----------------------------------------------------------------------

static ANNkd_ptr annReadTree(
	istream				&in,					// input stream
	ANNtreeType			tree_type,				// type of tree expected
	ANNidxArray			the_pidx,				// point indices (modified)
	int					&next_idx)				// next index (modified)
{
	char tag[STRING_LEN];						// tag (leaf, split, shrink)
	int n_pts;									// number of points in leaf
	int cd;										// cut dimension
	ANNcoord cv;								// cut value
	ANNcoord lb;								// low bound
	ANNcoord hb;								// high bound
	int n_bnds;									// number of bounding sides
	int sd;										// which side

	in >> tag;									// input node tag

	if (strcmp(tag, "null") == 0) {				// null tree
		return NULL;
	}
	//------------------------------------------------------------------
	//	Read a leaf
	//------------------------------------------------------------------
	if (strcmp(tag, "leaf") == 0) {				// leaf node

		in >> n_pts;							// input number of points
		int old_idx = next_idx;					// save next_idx
		if (n_pts == 0) {						// trivial leaf
			return KD_TRIVIAL;
		}
		else {
			for (int i = 0; i < n_pts; i++) {	// input point indices
				in >> the_pidx[next_idx++];		// store in array of indices
			}
		}
		return new ANNkd_leaf(n_pts, &the_pidx[old_idx]);
	}
	//------------------------------------------------------------------
	//	Read a splitting node
	//------------------------------------------------------------------
	else if (strcmp(tag, "split") == 0) {		// splitting node

		in >> cd >> cv >> lb >> hb;

												// read low and high subtrees
		ANNkd_ptr lc = annReadTree(in, tree_type, the_pidx, next_idx);
		ANNkd_ptr hc = annReadTree(in, tree_type, the_pidx, next_idx);
												// create new node and return
		return new ANNkd_split(cd, cv, lb, hb, lc, hc);
	}
	//------------------------------------------------------------------
	//	Read a shrinking node (bd-tree only)
	//------------------------------------------------------------------
	else if (strcmp(tag, "shrink") == 0) {		// shrinking node
		if (tree_type != BD_TREE) {
			annError("Shrinking node not allowed in kd-tree", ANNabort);
		}

		in >> n_bnds;							// number of bounding sides
												// allocate bounds array
		ANNorthHSArray bds = new ANNorthHalfSpace[n_bnds];
		for (int i = 0; i < n_bnds; i++) {
			in >> cd >> cv >> sd;				// input bounding halfspace
												// copy to array
			bds[i] = ANNorthHalfSpace(cd, cv, sd);
		}
												// read inner and outer subtrees
		ANNkd_ptr ic = annReadTree(in, tree_type, the_pidx, next_idx);
		ANNkd_ptr oc = annReadTree(in, tree_type, the_pidx, next_idx);
												// create new node and return
		return new ANNbd_shrink(n_bnds, bds, ic, oc);
	}
	else {
		annError("Illegal node type in dump file", ANNabort);
		exit(0);								// to keep the compiler happy
	}
}