File: functionsCalculate.cpp

package info (click to toggle)
groops 0%2Bgit20250907%2Bds-1
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 11,140 kB
  • sloc: cpp: 135,607; fortran: 1,603; makefile: 20
file content (238 lines) | stat: -rw-r--r-- 10,377 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
/***********************************************/
/**
* @file functionsCalculate.cpp
*
* @brief Functions Calculate.
*
* @author Torsten Mayer-Guerr
* @date 2009-09-11
*
*/
/***********************************************/

// Latex documentation
#define DOCSTRING docstring
static const char *docstring = R"(
This program manipulates \file{matrix files}{matrix} with data in columns.
If several \config{inputfile}s are given the data columns are copied side by side.
All \config{inputfile}s must contain the same number of rows.
The columns are enumerated by \verb|data0|,~\verb|data1|,~\ldots.

The content of \configFile{outputfile}{matrix} is controlled by \config{outColumn}.
The algorithm to compute the output is as follows:
The expressions in \config{outColumn} are evaluated once for each row of the input.
The variables \verb|data0|,~\verb|data1|,~\ldots are replaced by the according values from the input columns before.
Additional variables are available, e.g. \verb|index|, \verb|data0rms|, see~\reference{dataVariables}{general.parser:dataVariables}.
If no \config{outColumn} are specified all input columns are used instead directly.

For a simplified handling \config{constant}s can be defined by \verb|name=value|, e.g. \verb|annual=365.25|.
It is also possible to estimate \config{parameter}s in a least squares adjustment.
The \config{leastSquares} serves as template for observation equations for every row.
The expression \config{leastSquares} is evaluated for each row in the \config{inputfile}.
The variables \verb|data0|,~\verb|data1|,~\ldots are replaced by the according values from the input columns before.
In the next step the parameters are estimated in order to minimize the expressions in \config{leastSquares}
in the sense of least squares.

Afterwards complete rows are removed if one of the \config{removalCriteria} expressions for this row evaluates true (not zero).

An extra \config{statistics} file can be generated with one row of data. For the computation of the \config{outColumn} values
all~\reference{dataVariables}{general.parser:dataVariables} are available (e.g. \verb|data3mean|, \verb|data4std|)
inclusively the \config{constant}s and estimated \config{parameter}s but without the \verb|data0|,~\verb|data1|,~\ldots itself.
The variables and the numbering of the columns refers to the \configFile{outputfile}{matrix}.

First example: To calculate the mean of two values at each row set \config{outColumn} to \verb|0.5*(data1+data0)|.

Second example: An input file contain a column with times and a column with values.
To remove a trend from the values define the \config{parameter}s \verb|trend| and \verb|bias|.
The observation equation in \config{leastSquares} is \verb|data1 - (trend*data0+bias)|.
For output you can define the following columns for example:
\begin{itemize}
\item \config{outColumn}=\verb|data0|: points in time.
\item \config{outColumn}=\verb|data1|: the values itself.
\item \config{outColumn}=\verb|trend*data0+bias|: the linear fit.
\item \config{outColumn}=\verb|data1-trend*data0-bias|: the residuals.
\end{itemize}
The extra statistics file could contain in this case:
\begin{itemize}
\item \config{outColumn}=\verb|data0max-data0min|: time span.
\item \config{outColumn}=\verb|bias|: estimated parameter.
\item \config{outColumn}=\verb|trend|: estimated parameter.
\item \config{outColumn}=\verb|data3rms|: root mean square of the residuals.
\end{itemize}

See also \program{InstrumentArcCalculate}, \program{GriddedDataCalculate}, \program{MatrixCalculate}.
)";

/***********************************************/

#include "programs/program.h"
#include "parser/dataVariables.h"
#include "files/fileMatrix.h"

/***********************************************/

/** @brief Functions Calculate.
* @ingroup programsGroup */
class FunctionsCalculate
{
public:
  void run(Config &config, Parallel::CommunicatorPtr comm);
};

GROOPS_REGISTER_PROGRAM(FunctionsCalculate, SINGLEPROCESS, "Functions Calulate", Misc, Matrix, TimeSeries)

/***********************************************/

void FunctionsCalculate::run(Config &config, Parallel::CommunicatorPtr /*comm*/)
{
  try
  {
    FileName                           fileNameOut, fileNameStatistics;
    std::vector<FileName>              fileNamesIn;
    std::vector<ExpressionVariablePtr> constExpr, paramExpr;
    std::vector<ExpressionVariablePtr> lsaExpr, removeExpr;
    std::vector<ExpressionVariablePtr> outExpr, statisticsExpr;

    readConfig(config, "outputfile",      fileNameOut, Config::OPTIONAL, "",      "");
    readConfig(config, "inputfile",       fileNamesIn, Config::MUSTSET,  "",      "");
    readConfig(config, "constant",        constExpr,   Config::OPTIONAL, "",      "define a constant by name=value");
    readConfig(config, "parameter",       paramExpr,   Config::OPTIONAL, "",      "define a parameter by name[=value]");
    readConfig(config, "leastSquares",    lsaExpr,     Config::OPTIONAL, "",      "try to minimize the expression by adjustment of the parameters");
    readConfig(config, "removalCriteria", removeExpr,  Config::OPTIONAL, "",      "row is removed if one criterion evaluates true.");
    readConfig(config, "outColumn",       outExpr,     Config::OPTIONAL, R"(["data0"])", "expression to compute output columns (input columns are named data0, data1, ...)");
    if(readConfigSequence(config, "statistics", Config::OPTIONAL, "", ""))
    {
      readConfig(config, "outputfile", fileNameStatistics, Config::MUSTSET, "",         "matrix with one row, columns are user defined");
      readConfig(config, "outColumn",  statisticsExpr,     Config::MUSTSET, "data0rms", "expression to compute statistics columns, data* are the outputColumns");
      endSequence(config);
    }
    if(isCreateSchema(config)) return;

    // read data
    // ---------
    Matrix data;
    {
      std::vector<Matrix> data2(fileNamesIn.size());
      for(UInt i=0; i<fileNamesIn.size(); i++)
      {
        logStatus<<"read input from <"<<fileNamesIn.at(i)<<">"<<Log::endl;
        readFileMatrix(fileNamesIn.at(i), data2.at(i));
      }
      // test data
      for(UInt i=1; i<data2.size(); i++)
        if(data2.at(i).rows() != data2.at(0).rows())
          throw(Exception("all input data must have the same count of rows"));
      UInt cols = 0;
      for(UInt i=0; i<data2.size(); i++)
        cols += data2.at(i).columns();
      data = Matrix(data2.at(0).rows(), cols);
      UInt idx = 0;
      for(UInt i=0; i<data2.size(); i++)
      {
        copy(data2.at(i), data.column(idx,data2.at(i).columns()));
        idx += data2.at(i).columns();
      }
    }

    // create data variables
    // ---------------------
    // get real variable names, otherwise all named after config element
    std::for_each(constExpr.begin(), constExpr.end(), [&](auto expr) {expr->parseVariableName();});
    std::for_each(paramExpr.begin(), paramExpr.end(), [&](auto expr) {expr->parseVariableName();});

    VariableList varList;
    std::for_each(constExpr.begin(), constExpr.end(), [&](auto expr) {varList.addVariable(expr);});
    std::for_each(paramExpr.begin(), paramExpr.end(), [&](auto expr) {varList.addVariable(expr);});
    auto varListWoData = varList;
    addDataVariables(data, varList);

    // build observation equations
    // ---------------------------
    if(lsaExpr.size())
    {
      logStatus<<"least squares adjustment"<<Log::endl;
      Vector l(data.rows()*lsaExpr.size());
      Matrix A(data.rows()*lsaExpr.size(), paramExpr.size());

      for(UInt k=0; k<lsaExpr.size(); k++)
      {
        std::vector<ExpressionVariablePtr> lsaA(paramExpr.size());
        for(UInt s=0; s<paramExpr.size(); s++)
        {
          lsaA.at(s) = lsaExpr.at(k)->derivative(paramExpr.at(s)->name(), varList);
          lsaA.at(s)->simplify(varList);
        }
        lsaExpr.at(k)->simplify(varList);

        for(UInt i=0; i<data.rows(); i++)
        {
          evaluateDataVariables(data, i, varList);
          l(i+k*data.rows()) = -lsaExpr.at(k)->evaluate(varList);  // observations
          for(UInt s=0; s<lsaA.size(); s++)
            A(i+k*data.rows(), s) = lsaA.at(s)->evaluate(varList);  // columns of design matrix
        }
        undefineDataVariables(data, varList);
      }

      Vector x = leastSquares(A,l);
      for(UInt s=0; s<paramExpr.size(); s++)
      {
        x(s) += paramExpr.at(s)->evaluate(varList);
        paramExpr.at(s)->setValue( x(s) );
        varList.setVariable(paramExpr.at(s)->name(),  x(s) );
        varListWoData.setVariable(paramExpr.at(s)->name(),  x(s) );
        logInfo<<"  "<<paramExpr.at(s)->name()<<" = "<<x(s)<<Log::endl;
      }
    }

    // calculate output matrix
    // -----------------------
    logStatus<<"calculate output matrix"<<Log::endl;
    std::for_each(outExpr.begin(),    outExpr.end(),    [&](auto expr) {expr->simplify(varList);});
    std::for_each(removeExpr.begin(), removeExpr.end(), [&](auto expr) {expr->simplify(varList);});
    Matrix outData(data.rows(), outExpr.size() ? outExpr.size() : data.columns());
    UInt row = 0;
    for(UInt i=0; i<outData.rows(); i++)
    {
      evaluateDataVariables(data, i, varList);
      if(!std::any_of(removeExpr.begin(), removeExpr.end(), [&](auto expr) {return expr->evaluate(varList) != 0;}))
      {
        for(UInt k=0; k<outData.columns(); k++)
          outData(row, k) = outExpr.size() ? outExpr.at(k)->evaluate(varList) : data(i, k);
        row++;
      }
    }
    if(row < outData.rows())
    {
      logInfo<<"  "<<outData.rows()-row<<" rows removed"<<Log::endl;
      outData = outData.row(0, row);
    }

    // write output
    // ------------
    if(!fileNameOut.empty())
    {
      logStatus<<"write output to <"<<fileNameOut<<">"<<Log::endl;
      writeFileMatrix(fileNameOut, outData);
    }

    // statistics
    // ----------
    if(!fileNameStatistics.empty())
    {
      logStatus<<"write statistics to <"<<fileNameStatistics<<">"<<Log::endl;
      auto varList = varListWoData;
      addDataVariables(outData, varList);
      Matrix statistics(1, statisticsExpr.size());
      for(UInt k=0; k<statistics.columns(); k++)
        statistics(0, k) = statisticsExpr.at(k)->evaluate(varList);
      writeFileMatrix(fileNameStatistics, statistics);
    }
  }
  catch(std::exception &e)
  {
    GROOPS_RETHROW(e)
  }
}

/***********************************************/