File: ClassicalQuantileComputer.h

package info (click to toggle)
casacore 3.8.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 51,912 kB
  • sloc: cpp: 471,569; fortran: 16,372; ansic: 7,416; yacc: 4,714; lex: 2,346; sh: 1,865; python: 629; perl: 531; sed: 499; csh: 201; makefile: 32
file content (490 lines) | stat: -rw-r--r-- 20,279 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
//# Copyright (C) 2000,2001
//# Associated Universities, Inc. Washington DC, USA.
//#
//# This library is free software; you can redistribute it and/or modify it
//# under the terms of the GNU Library General Public License as published by
//# the Free Software Foundation; either version 2 of the License, or (at your
//# option) any later version.
//#
//# This library is distributed in the hope that it will be useful, but WITHOUT
//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
//# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
//# License for more details.
//#
//# You should have received a copy of the GNU Library General Public License
//# along with this library; if not, write to the Free Software Foundation,
//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
//#
//# Correspondence concerning AIPS++ should be addressed as follows:
//#        Internet email: casa-feedback@nrao.edu.
//#        Postal address: AIPS++ Project Office
//#                        National Radio Astronomy Observatory
//#                        520 Edgemont Road
//#                        Charlottesville, VA 22903-2475 USA
//#

#ifndef SCIMATH_CLASSICALQUANTILECOMPUTER_H
#define SCIMATH_CLASSICALQUANTILECOMPUTER_H

#include <casacore/scimath/StatsFramework/StatisticsAlgorithmQuantileComputer.h>

#include <casacore/scimath/StatsFramework/StatisticsUtilities.h>

#include <casacore/casa/aips.h>

#include <map>
#include <set>
#include <utility>
#include <vector>

namespace casacore {

// This class is used internally by ClassicalStatistics objects. It should never
// be explicitly instantiated by an API developer. See the documentation of
// StatisticsAlgorithm for details regarding QuantileComputer classes.

template <
    class AccumType, class DataIterator, class MaskIterator=const Bool*,
    class WeightsIterator=DataIterator
> class ClassicalQuantileComputer
    : public StatisticsAlgorithmQuantileComputer<CASA_STATP> {

    using LimitPair = std::pair<AccumType, AccumType>;
    using LimitPairVectorIter = typename std::vector<LimitPair>::const_iterator;
    using IndexValueMap = typename std::map<uInt64, AccumType>;
    using IndexSet = std::set<uInt64>;

public:

    ClassicalQuantileComputer() = delete;

    ClassicalQuantileComputer(StatisticsDataset<CASA_STATP>* dataset);

    // copy semantics
    ClassicalQuantileComputer(const ClassicalQuantileComputer& other);

    virtual ~ClassicalQuantileComputer();

    // copy semantics
    ClassicalQuantileComputer& operator=(
        const ClassicalQuantileComputer& other
    );

    // clone this object by returning a pointer to a copy
    virtual StatisticsAlgorithmQuantileComputer<CASA_STATP>* clone() const;

    // Caller is responsible for passing correct values of mynpts, mymin, and
    // mymax; no checking is done for correctness in this method.
    virtual AccumType getMedian(
        uInt64 mynpts, AccumType mymin, AccumType mymax,
        uInt binningThreshholdSizeBytes, Bool persistSortedArray, uInt nBins
    );

    // get the median of the absolute deviation about the median of the data.
    virtual AccumType getMedianAbsDevMed(
        uInt64 mynpts, AccumType mymin, AccumType mymax,
        uInt binningThreshholdSizeBytes, Bool persistSortedArray, uInt nBins
    );

    // If one needs to compute both the median and QuantileComputer values, it
    // is better to call getMedianAndQuantiles() rather than getMedian() and
    // getQuantiles() separately, as the first will scan large data sets fewer
    // times than calling the separate methods. The return value is the median;
    // the quantiles are returned in the <src>quantiles</src> map. Values in the
    // <src>fractions</src> set represent the locations in the CDF and should be
    // between 0 and 1, exclusive.
    virtual AccumType getMedianAndQuantiles(
        std::map<Double, AccumType>& quantiles,
        const std::set<Double>& fractions, uInt64 mynpts, AccumType mymin,
        AccumType mymax, uInt binningThreshholdSizeBytes,
        Bool persistSortedArray, uInt nBins
    );

    // Get the specified Quantiles. <src>fractions</src> must be between 0 and
    // 1, noninclusive.
    virtual std::map<Double, AccumType> getQuantiles(
        const std::set<Double>& fractions, uInt64 mynpts, AccumType mymin,
        AccumType mymax, uInt binningThreshholdSizeBytes,
        Bool persistSortedArray, uInt nBins
    );

    // reset the private fields
    virtual void reset();

protected:

    // <group>
    // Get the counts of data within the specified histogram bins. The number of
    // arrays within binCounts will be equal to the number of histograms in
    // <src>hist</src>. Each array within <src>binCounts</src> will have the
    // same number of elements as the number of bins in its corresponding
    // histogram in <src>hist</src>.
    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const;

    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const DataRanges& ranges, Bool isInclude,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const;

    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const;

    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
        const DataRanges& ranges, Bool isInclude,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const;

    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin,
        const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const ;

    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin,
        const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
        const DataRanges& ranges, Bool isInclude,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const;

    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin,
        const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride,
        const DataRanges& ranges, Bool isInclude,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const;

    virtual void _findBins(
        std::vector<std::vector<uInt64>>& binCounts,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, const DataIterator& dataBegin,
        const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit
    ) const;
    // </group>

    //<group>
    // populate an unsorted array with valid data.
    // no weights, no mask, no ranges
    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride
    ) const;

    // ranges
    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const DataRanges& ranges, Bool isInclude
    ) const;

    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
        uInt maskStride
    ) const;

    // mask and ranges
    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
        const DataRanges& ranges, Bool isInclude
    ) const;

    // weights
    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
    ) const;

    // weights and ranges
    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
        const DataRanges& ranges, Bool isInclude
    ) const;

    // weights and mask
    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride
    ) const;

    // weights, mask, ranges
    virtual void _populateArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride,
        const DataRanges& ranges, Bool isInclude
    ) const;
    // </group>

    // <group>
    // Create a std::vector of unsorted arrays, one array for each bin defined
    // by <src>includeLimits</src>. <src>includeLimits</src> should be
    // non-overlapping and should be given in ascending order (the algorithm
    // used assumes this). Once the sum of the lengths of all arrays equals
    // <src>maxCount</src> the method will return with no further processing.
    // no weights, no mask, no ranges
    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
        const IncludeLimits& includeLimits, uInt64 maxCount
    ) const;

    // ranges
    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
        const DataRanges& ranges, Bool isInclude,
        const IncludeLimits& includeLimits, uInt64 maxCount
    ) const;

    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride,
        const IncludeLimits& includeLimits, uInt64 maxCount
    ) const;

    // mask and ranges
    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride,
        const DataRanges& ranges, Bool isInclude,
        const IncludeLimits& includeLimits, uInt64 maxCount
    ) const;

    // weights
    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
        uInt64 nr, uInt dataStride, const IncludeLimits& includeLimits,
        uInt64 maxCount
    ) const;

    // weights and ranges
    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
        uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude,
        const IncludeLimits& includeLimits, uInt64 maxCount
    ) const;

    // weights and mask
    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, const WeightsIterator& weightBegin,
        uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
        uInt maskStride, const IncludeLimits& includeLimits, uInt64 maxCount
    ) const;

    // weights, mask, ranges
    virtual void _populateArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        const DataIterator& dataBegin, const WeightsIterator& weightBegin,
        uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
        uInt maskStride, const DataRanges& ranges, Bool isInclude,
        const IncludeLimits& includeLimits, uInt64 maxCount
    ) const;
    // </group>

    // <group>
    // no weights, no mask, no ranges
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        uInt64 nr, uInt dataStride, uInt maxElements
    ) const;

    // ranges
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const DataRanges& ranges, Bool isInclude,
        uInt maxElements
    ) const;

    // mask
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
        uInt maskStride, uInt maxElements
    ) const;

    // mask and ranges
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin, uInt64 nr,
        uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
        const DataRanges& ranges, Bool isInclude, uInt maxElements
    ) const;

    // weights
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
        uInt maxElements
    ) const;

    // weights and ranges
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
        const DataRanges& ranges, Bool isInclude, uInt maxElements
    ) const;

    // weights and mask
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride, uInt maxElements
    ) const;

    // weights, mask, ranges
    virtual Bool _populateTestArray(
        std::vector<AccumType>& ary, const DataIterator& dataBegin,
        const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
        const MaskIterator& maskBegin, uInt maskStride,
        const DataRanges& ranges, Bool isInclude, uInt maxElements
    ) const;
    // </group>

    // get values from sorted array if the array is small enough to be held in
    // memory. Note that this is the array containing all good data, not data in
    // just a single bin representing a subset of good data.
    // Returns True if the data were successfully retrieved.
    // If True is returned, the values map will contain a map of index to value.
    // It is the caller's responsibility to check that <src>mynpts</src> is not
    // 0; no checking is done here.
    Bool _valuesFromSortedArray(
        std::map<uInt64, AccumType>& values, uInt64 mynpts,
        const std::set<uInt64>& indices, uInt64 maxArraySize,
        Bool persistSortedArray
    );

private:

    Bool _doMedAbsDevMed{False};
    // for use in often repeatedly run macros
    AccumType _myMedian{0};

    // tally the number of data points that fall into each bin provided by
    // <src>hist</src>. Any points that are less than hist.minLimit or greater
    // than hist.minLimit + hist.nBins*hist.binWidth are not included in the
    // counts. A data point that falls exactly on a bin boundary is considered
    // to be in the higher index bin. <src>sameVal</src> will be non-null if all
    // the good values in the histogram range are the same. In that case, the
    // value held will be the value of each of those data points.
    std::vector<std::vector<uInt64>> _binCounts(
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        const std::vector<StatsHistogram<AccumType>>& hist
    );

    void _computeBins(
        std::vector<std::vector<uInt64>>& bins,
        std::vector<std::shared_ptr<AccumType>>& sameVal,
        std::vector<Bool>& allSame, DataIterator dataIter,
        MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count,
        const std::vector<StatsHistogram<AccumType>>& hist,
        const std::vector<AccumType>& maxLimit,
        const typename StatisticsDataset<CASA_STATP>::ChunkData& chunk
    );

    void _computeDataArray(
        std::vector<AccumType>& ary, DataIterator dataIter,
        MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount,
        const typename StatisticsDataset<CASA_STATP>::ChunkData& chunk
    );

    void _computeDataArrays(
        std::vector<std::vector<AccumType>>& arys, uInt64& currentCount,
        DataIterator dataIter, MaskIterator maskIter,
        WeightsIterator weightsIter, uInt64 dataCount,
        const IncludeLimits& includeLimits, uInt64 maxCount,
        const typename StatisticsDataset<CASA_STATP>::ChunkData& chunk
    );

    // Create an unsorted array of the complete data set. If
    // <src>includeLimits</src> is specified, only points within those limits
    // (including min but excluding max, as per definition of bins), are
    // included.
    void _createDataArray(std::vector<AccumType>& array);

    void _createDataArrays(
        std::vector<std::vector<AccumType>>& arrays,
        const IncludeLimits& includeLimits, uInt64 maxCount
    );

    // extract data from multiple histograms given by <src>hist</src>.
    // <src>dataIndices</src> represent the indices of the sorted arrays of
    // values to extract. There should be exactly one set of data indices to
    // extract for each supplied histogram. The data indices are relative to the
    // minimum value of the minimum bin in their respective histograms. The
    // ordering of the maps in the returned std::vector represent the ordering
    // of histograms in <src>hist</src>. <src>hist</src> should contain
    // non-overlapping histograms and the histograms should be specified in
    // ascending order.
    std::vector<IndexValueMap> _dataFromMultipleBins(
        const std::vector<StatsHistogram<AccumType>>& hist,
        uInt64 maxArraySize, const std::vector<IndexSet>& dataIndices,
        uInt nBins
    );

    std::vector<IndexValueMap> _dataFromSingleBins(
        const std::vector<uInt64>& binNpts, uInt64 maxArraySize,
        const IncludeLimits& binLimits,
        const std::vector<IndexSet>& dataIndices, uInt nBins
    );

    // get the values for the specified indices in the sorted array of all good
    // data
    IndexValueMap _indicesToValues(
        uInt64 mynpts, AccumType mymin, AccumType mymax, uInt64 maxArraySize,
        const IndexSet& dataIndices, Bool persistSortedArray, uInt nBins
    );

    // get the index (for odd npts) or indices (for even npts) of the median of
    // the sorted array.
    static IndexSet _medianIndices(uInt64 mynpts);

};

}

#ifndef CASACORE_NO_AUTO_TEMPLATES
#include <casacore/scimath/StatsFramework/ClassicalQuantileComputer.tcc>
#endif 

#endif