File: SimpleDataset.h

package info (click to toggle)
ausaxs 1.1.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 72,592 kB
  • sloc: cpp: 49,853; ansic: 6,901; python: 730; makefile: 18
file content (221 lines) | stat: -rw-r--r-- 7,704 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
// SPDX-License-Identifier: LGPL-3.0-or-later
// Author: Kristian Lytje

#pragma once

#include <hist/HistFwd.h>

#include <dataset/PointSet.h>
#include <dataset/Dataset.h>

namespace ausaxs {
    /**
     * @brief A simple dataset is a collection of points of the form x | y | yerr. 
     */
    class SimpleDataset : public Dataset {
        protected: 
            /**
             * @brief Construct a dataset with N rows and M columns. 
             *        This is protected because it should only be used by derived classes for supporting more columns.
             */
            SimpleDataset(unsigned int N, unsigned int M);

        public: 
            SimpleDataset();
            SimpleDataset(const SimpleDataset& d);
            SimpleDataset(SimpleDataset&& d);
            SimpleDataset& operator=(const SimpleDataset& other);
            SimpleDataset& operator=(SimpleDataset&& other) noexcept;
            ~SimpleDataset() override;

            SimpleDataset(const hist::Histogram& h);
            SimpleDataset(const Dataset& d);

            /**
             * @brief Construct a new empty dataset with the given number of rows. 
             */
            SimpleDataset(unsigned int rows) noexcept;

            /**
             * @brief Construct a new dataset from two vectors.
             */
            SimpleDataset(const std::vector<double>& x, const std::vector<double>& y);

            /**
             * @brief Construct a new dataset from three vectors.
             */
            SimpleDataset(const std::vector<double>& x, const std::vector<double>& y, const std::vector<double>& yerr);

            /**
             * @brief Construct a new dataset from an input file.
             */
            SimpleDataset(const io::ExistingFile& path);

            // Get the third column.
            [[nodiscard]] const ConstColumn<double> yerr() const {return col(2);}

            // Get the third column.
            [[nodiscard]] MutableColumn<double> yerr() {return col(2);}

            // Get the ith value in the third column.
            [[nodiscard]] const double& yerr(unsigned int i) const {return data.index(i, 2);}

            // Get the ith value in the third column.
            [[nodiscard]] double& yerr(unsigned int i) {return data.index(i, 2);}

            /**
             * @brief Load a dataset from the specified file. 
             */
            virtual void load(const io::ExistingFile& path) override;

            /**
             * @brief Reduce the number of rows to the specified amount by uniformly removing points in x-space.
             * 
             * @param target The target number of points.
             * @param log If true, the points will be removed uniformly on a logarithmic scale.
             */
            void reduce(unsigned int target, bool log = false);

            /**
             * @brief Assign a Matrix to this dataset.
             */
            void operator=(Matrix<double>&& other);
            
            bool operator==(const SimpleDataset& other) const;

            /**
             * @brief Get the spanned x-range. 
             */
            [[nodiscard]] Limit span_x() const noexcept;

            /**
             * @brief Get the spanned y-range. 
             */
            [[nodiscard]] Limit span_y() const noexcept;

            /**
             * @brief Get the spanned x-range.
             */
            [[nodiscard]] Limit get_xlimits() const noexcept;

            /**
             * @brief Get the spanned y-range.
             */
            [[nodiscard]] Limit get_ylimits() const noexcept;

            /**
             * @brief Get the positive spanned y-range.
             *        This can be useful for setting log ranges. 
             */
            [[nodiscard]] Limit span_y_positive() const noexcept;

            /**
             * @brief Add a new point at the end of the dataset.
             */
            void push_back(double x, double y, double yerr = 0);

            /**
             * @brief Set the normalization of the y-values. The first y-value will be fixed to this. 
             * 
             * @return The normalization factor which all entries were multiplied by. 
             */
            double normalize(double y0 = 1);

            /**
             * @brief Scale all errors by some common factor. 
             */
            virtual void scale_errors(double factor);

            /**
             * @brief Scale the y-values (and their associated errors) by some common factor.
             */
            void scale_y(double factor);

            /**
             * @brief Simulate Gaussian noise on the y-values based on the errors. 
             */
            void simulate_noise();

            /**
             * @brief Generate errors for the y-values mimicking what one would find experimentally. 
             */
            void simulate_errors();

            /**
             * @brief Get the point at a given index.
             */
            Point2D get_point(unsigned int index) const;

            /**
             * @brief Get the point with the smallest y-value.
             */
            Point2D find_minimum() const;

            /**
             * @brief Add a new datapoint to the end of this dataset. 
             */
            void push_back(const Point2D& point) noexcept;

            /**
             * @brief Rebin the data to a logarithmic scale. 
             *        This follows the typical rebinning algorithm used experimentally.
             */
            void rebin() noexcept;

            /**
             * @brief Generate a randomized dataset.
             * 
             * x-values will span from 0 to size-1.
             * y-values will be generated in the range [min, max]. 
             * yerr-values will be generated in the range 0.1[min, max].
             * 
             * @param size Size of the dataset.
             * @param min Minimum generated value.
             * @param max Maxium generated value. 
             */
            static SimpleDataset generate_random_data(unsigned int size, double min, double max);

            /**
             * @brief Generate a randomized dataset.
             * 
             * x-values will span from 0 to size-1.
             * y-values will be generated in the range [-value, value]. 
             * yerr-values will be generated in the range 0.1[-value value].
             * 
             * @param size Size of the dataset.
             * @param val Maximum and minimum bound on the generated values. 
             */
            static SimpleDataset generate_random_data(unsigned int size, double val);

            /**
             * @brief Get the mean of the y values.
             */
            [[nodiscard]] double mean() const;

            /**
             * @brief Get the weighted mean of the y values.
             */
            [[nodiscard]] double weighted_mean() const;

            /**
             * @brief Get the standard deviation of the y values.
             */
            [[nodiscard]] double std() const;

            /**
             * @brief Get the weighted standard deviation of the y values.
             */
            [[nodiscard]] double weighted_mean_error() const;

            /**
             * @brief Removes consecutive duplicate y-values.
             */
            void remove_consecutive_duplicates();

            using Dataset::operator=;

        private:
            void initialize(const std::vector<double>& x, const std::vector<double>& y);
            void initialize(const std::vector<double>& x, const std::vector<double>& y, const std::vector<double>& yerr);
    };
}