File: ISMBucket.h

package info (click to toggle)
casacore 3.5.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 51,680 kB
  • sloc: cpp: 462,815; fortran: 16,372; ansic: 7,403; yacc: 4,626; lex: 2,340; sh: 1,786; python: 629; perl: 531; sed: 499; csh: 34; makefile: 31
file content (336 lines) | stat: -rw-r--r-- 13,589 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
//# ISMBucket.h: A bucket in the Incremental Storage Manager
//# Copyright (C) 1996,1999,2000,2001
//# Associated Universities, Inc. Washington DC, USA.
//#
//# This library is free software; you can redistribute it and/or modify it
//# under the terms of the GNU Library General Public License as published by
//# the Free Software Foundation; either version 2 of the License, or (at your
//# option) any later version.
//#
//# This library is distributed in the hope that it will be useful, but WITHOUT
//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
//# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
//# License for more details.
//#
//# You should have received a copy of the GNU Library General Public License
//# along with this library; if not, write to the Free Software Foundation,
//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
//#
//# Correspondence concerning AIPS++ should be addressed as follows:
//#        Internet email: aips2-request@nrao.edu.
//#        Postal address: AIPS++ Project Office
//#                        National Radio Astronomy Observatory
//#                        520 Edgemont Road
//#                        Charlottesville, VA 22903-2475 USA
//#
//# $Id$

#ifndef TABLES_ISMBUCKET_H
#define TABLES_ISMBUCKET_H

//# Includes
#include <casacore/casa/aips.h>
#include <casacore/casa/Containers/Block.h>
#include <casacore/casa/BasicSL/String.h>
#include <casacore/casa/iosfwd.h>

namespace casacore { //# NAMESPACE CASACORE - BEGIN

//# Forward declarations
class ISMBase;

// <summary>
// A bucket in the Incremental Storage Manager
// </summary>

// <use visibility=local>

// <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="">
// </reviewed>

// <prerequisite>
//# Classes you should understand before using this one.
//   <li> <linkto class=IncrementalStMan>IncrementalStMan</linkto>
//   <li> <linkto class=BucketCache>BucketCache</linkto>
// </prerequisite>

// <etymology>
// ISMBucket represents a bucket in the Incremental Storage Manager.
// </etymology>

// <synopsis>
// The Incremental Storage Manager uses a <linkto class=BucketCache>
// BucketCache</linkto> object to read/write/cache the buckets
// containing the data. An <src>ISMBucket</src> object is the
// internal representation of the contents of a bucket. <src>ISMBucket</src>
// contains static callback functions which are called by
// <src>BucketCache</src> when reading/writing a bucket. These callback
// functions do the mapping of bucket data to <src>ISMBucket</src> object
// and vice-versa.
// <p>
// A bucket contains the values of several rows
// of all columns bound to this Incremental Storage Manager.
// A bucket is split into a data part and an index part.
// Each part has an arbitrary length but together they do not exceed
// the fixed bucket length.
// <p>
// The beginning of the data part contains the values of all columns
// bound. The remainder of the data part contains the values of
// the rows/columns with a changed value.
// <br>
// The index part contains an index per column. Each index contains the
// row number and an offset for a row with a stored value. The row numbers
// are relative to the beginning of the bucket, so the bucket has
// no knowledge about the absolute row numbers. In this way deletion of
// rows is much simpler.
// <p>
// The contents of a bucket looks like:
// <srcblock>
//    -------------------------------------------------------------------
//    | index offset   | data part     | index part              | free |
//    -------------------------------------------------------------------
//     0                4               4+length(data part)
//    <--------------------------bucketsize----------------------------->
// </srcblock>
// The data part contains all data value belonging to the bucket.
// The index part contains for each column the following data:
// <srcblock>
//    -----------------------------------------------------------------------
//    | #values stored | row numbers of values | offset in data part of     |
//    | for column i   | stored for column i   | values stored for column i |
//    -----------------------------------------------------------------------
//     0                4                       4+4*nrval
// </srcblock>
// Note that the row numbers in the bucket start at 0, thus are relative
// to the beginning of the bucket. The main index kept in
// <linkto class=ISMIndex>ISMIndex</linkto> knows the starting row of
// each bucket. In this way bucket splitting and especially row removal
// is much easier.
// <p>
// The bucket can be stored in canonical or local (i.e. native) data format.
// When a bucket is read into memory, its data are read, converted, and
// stored in the ISMBucket object. When flushed, the contents are
// written. ISMBucket takes care that the values stored in its object
// do not exceed the size of the bucket. When full, the user can call
// a function to split it into a left and right bucket. When the new
// value has to be written at the end, the split merely consist of
// creating a new bucket. In any case, care is taken that a row is
// not split. Thus a row is always entirely contained in one bucket.
// <p>
// Class <linkto class=ISMColumn>ISMColumn</linkto> does the actual
// writing of data in a bucket and uses the relevant ISMBucket functions.

// <motivation>
// ISMBucket encapsulates the data of a bucket.
// </motivation>

//# <todo asof="$DATE:$">
//# A List of bugs, limitations, extensions or planned refinements.
//# </todo>


class ISMBucket
{
public:

    // Create a bucket with the given parent.
    // When <src>bucketStorage</src> is non-zero, reconstruct the
    // object from it.
    // It keeps the pointer to its parent (but does not own it).
    ISMBucket (ISMBase* parent, const char* bucketStorage);

    ~ISMBucket();

    // Get the row-interval for given column and row.
    // It sets the start and end of the interval to which the row belongs
    // and the offset of its current value.
    // It returns the index where the row number can be put in the
    // bucket index.
    uInt getInterval (uInt colnr, rownr_t rownr, rownr_t bucketNrrow,
		      rownr_t& start, rownr_t& end, uInt& offset) const;

    // Is the bucket large enough to add a value?
    Bool canAddData (uInt leng) const;

    // Add the data to the data part.
    // It updates the bucket index at the given index.
    // An exception is thrown if the bucket is too small.
    void addData (uInt colnr, rownr_t rownr, uInt index,
		  const char* data, uInt leng);

    // Is the bucket large enough to replace a value?
    Bool canReplaceData (uInt newLeng, uInt oldLeng) const;

    // Replace a data item.
    // When its length is variable (indicated by fixedLength=0), the old
    // value will be removed and the new one appended at the end.
    // An exception is thrown if the bucket is too small.
    void replaceData (uInt& offset, const char* data, uInt newLeng,
		      uInt fixedLength);

    // Get a pointer to the data for the given offset.
    const char* get (uInt offset) const;

    // Get the length of the data value.
    // It is <src>fixedLength</src> when non-zero,
    // otherwise read it from the data value.
    uInt getLength (uInt fixedLength, const char* data) const;

    // Get access to the offset of the data for given column and row.
    // It allows to change it (used for example by replaceData).
    uInt& getOffset (uInt colnr, rownr_t rownr);

    // Get access to the index information for the given column.
    // This is used by ISMColumn when putting the data.
    // <group>
    // Return the row numbers with a stored value.
    Block<rownr_t>& rowIndex (uInt colnr);
    // Return the offsets of the values stored in the data part.
    Block<uInt>& offIndex (uInt colnr);
    // Return the number of values stored.
    uInt& indexUsed (uInt colnr);
    // </group>

    // Split the bucket in the middle.
    // It returns the row number where the bucket was split and the
    // new left and right bucket. The caller is responsible for
    // deleting the newly created buckets.
    // When possible a simple split is done.
    // <br>
    // The starting values in the right bucket may be copies of the
    // values in the left bucket. The duplicated Block contains a switch
    // per column indicating if the value is copied.
    rownr_t split (ISMBucket*& left, ISMBucket*& right, Block<Bool>& duplicated,
                   rownr_t bucketStartRow, rownr_t bucketNrrow,
                   uInt colnr, rownr_t rownr, uInt lengToAdd);

    // Determine whether a simple split is possible. If so, do it.
    // This is possible if the new row is at the end of the last bucket,
    // which will often be the case.
    // <br>A simple split means adding a new bucket for the new row.
    // If the old bucket already contains values for that row, those
    // values are moved to the new bucket.
    // <br>This fuction is only called by split, which created the
    // left and right bucket.
    Bool simpleSplit (ISMBucket* left, ISMBucket* right,
		      Block<Bool>& duplicated,
		      rownr_t& splitRownr, rownr_t rownr);

    // Return the index where the bucket should be split to get
    // two parts with almost identical length.
    uInt getSplit (uInt totLeng, const Block<uInt>& rowLeng,
		   const Block<uInt>& cumLeng);

    // Remove <src>nr</src> items from data and index part by shifting
    // to the left. The <src>rowIndex</src>, <src>offIndex</src>, and
    // <src>nused</src> get updated. The caller is responsible for
    // removing data when needed (e.g. <src>ISMIndColumn</src> removes
    // the indirect arrays from its file).
    void shiftLeft (uInt index, uInt nr, Block<rownr_t>& rowIndex,
		    Block<uInt>& offIndex, uInt& nused, uInt leng);

    // Copy the contents of that bucket to this bucket.
    // This is used after a split operation.
    void copy (const ISMBucket& that);

    // Callback function when BucketCache reads a bucket.
    // It creates an ISMBucket object and converts the raw bucketStorage
    // to that object.
    // It returns the pointer to ISMBucket object which gets part of the cache.
    // The object gets deleted by the deleteCallBack function.
    static char* readCallBack (void* owner, const char* bucketStorage);

    // Callback function when BucketCache writes a bucket.
    // It converts the ISMBucket bucket object to the raw bucketStorage.
    static void writeCallBack (void* owner, char* bucketStorage,
			       const char* bucket);

    // Callback function when BucketCache adds a new bucket to the data file.
    // This function creates an empty ISMBucket object.
    // It returns the pointer to ISMBucket object which gets part of the cache.
    // The object gets deleted by the deleteCallBack function.
    static char* initCallBack (void* owner);

    // Callback function when BucketCache removes a bucket from the cache.
    // This function dletes the ISMBucket bucket object.
    static void deleteCallBack (void*, char* bucket);

    // Show the layout of the bucket.
    void show (ostream& os) const;

    // Check that there are no repeated rowIds in the bucket
    Bool check (uInt& offendingCol, uInt& offendingIndex,
                rownr_t& offendingRow, rownr_t& offendingPrevRow) const;

private:
    // Forbid copy constructor.
    ISMBucket (const ISMBucket&);

    // Forbid assignment.
    ISMBucket& operator= (const ISMBucket&);

    // Remove a data item with the given length.
    // If the length is zero, its variable length is read first.
    void removeData (uInt offset, uInt leng);

    // Insert a data value by appending it to the end.
    // It returns the offset of the data value.
    uInt insertData (const char* data, uInt leng);

    // Copy a data item from this bucket to the other bucket.
    uInt copyData (ISMBucket& other, uInt colnr, rownr_t toRownr,
		   uInt fromIndex, uInt toIndex) const;

    // Read the data from the storage into this bucket.
    void read (const char* bucketStorage);

    // Write the bucket into the storage.
    void write (char* bucketStorage) const;


    //# Declare member variables.
    // Pointer to the parent storage manager.
    ISMBase*          stmanPtr_p;
    // The size (in bytes) of an uInt and rownr_t (used in index, etc.).
    uInt              uIntSize_p;
    uInt              rownrSize_p;
    // The size (in bytes) of the data.
    uInt              dataLeng_p;
    // The size (in bytes) of the index.
    uInt              indexLeng_p;
    // The row index per column; each index contains the row number
    // of each value stored in the bucket (for that column).
    PtrBlock<Block<rownr_t>*> rowIndex_p;
    // The offset index per column; each index contains the offset (in bytes)
    // of each value stored in the bucket (for that column).
    PtrBlock<Block<uInt>*> offIndex_p;
    // Nr of used elements in each index; i.e. the number of stored values
    // per column.
    Block<uInt>       indexUsed_p;
    // The data space (in external (e.g. canonical) format).
    char*             data_p;
};


inline const char* ISMBucket::get (uInt offset) const
{
    return data_p + offset;
}
inline Block<rownr_t>& ISMBucket::rowIndex (uInt colnr)
{
    return *(rowIndex_p[colnr]);
}
inline Block<uInt>& ISMBucket::offIndex (uInt colnr)
{
    return *(offIndex_p[colnr]);
}
inline uInt& ISMBucket::indexUsed (uInt colnr)
{
    return indexUsed_p[colnr];
}



} //# NAMESPACE CASACORE - END

#endif