File: instrumentArcStatistics.cpp

package info (click to toggle)
groops 0%2Bgit20250907%2Bds-1
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 11,140 kB
  • sloc: cpp: 135,607; fortran: 1,603; makefile: 20
file content (127 lines) | stat: -rw-r--r-- 5,473 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/***********************************************/
/**
* @file instrumentArcStatistics.cpp
*
* @brief Compute statistics for arcs of instrument data.
*
* @author Sebastian Strasser
* @author Matthias Ellmer
* @author Torsten Mayer-Guerr
* @date 2016-07-18
*/
/***********************************************/

// Latex documentation
#define DOCSTRING docstring
static const char *docstring = R"(
Computes statistics of selected data columns of \configFile{inputfileInstrument}{instrument} arc wise.
The \configFile{outputfileStatisticsTimeSeries}{instrument} contains for every arc one (mid) epoch
with statistics column(s). Possible statistics are root mean square, standard deviation,
mean, median, min, and max.

With \config{perColumn} separate statistics for each selected data column are computed,
otherwise an overall value is computed.

See also \program{InstrumentArcCrossStatistics}, \program{InstrumentStatisticsTimeSeries}.
)";

/***********************************************/

#include "programs/program.h"
#include "files/fileInstrument.h"

/***** CLASS ***********************************/

/** @brief Compute statistics for arcs of instrument data.
* @ingroup programsGroup */
class InstrumentArcStatistics
{
public:
  void run(Config &config, Parallel::CommunicatorPtr comm);
};

GROOPS_REGISTER_PROGRAM(InstrumentArcStatistics, PARALLEL, "Compute statistics for arcs of instrument data", Instrument, TimeSeries, Statistics)

/***********************************************/

void InstrumentArcStatistics::run(Config &config, Parallel::CommunicatorPtr comm)
{
  try
  {
    FileName    fileNameInstrumentOut, fileNameInstrumentIn;
    UInt        startData, countData = MAX_UINT;
    Bool        perColumn, ignoreNan;
    std::string choice;
    std::function<double(const_MatrixSliceRef A)> func;

    readConfig(config, "outputfileStatisticsTimeSeries", fileNameInstrumentOut, Config::MUSTSET, "", "columns: mjd, statistics column(s) per instrument file");
    readConfig(config, "inputfileInstrument",            fileNameInstrumentIn,  Config::MUSTSET, "", "");
    if(readConfigChoice(config, "statistics", choice, Config::MUSTSET, "", ""))
    {
      if(readConfigChoiceElement(config, "rootMeanSquare",    choice)) func = rootMeanSquare;
      if(readConfigChoiceElement(config, "standardDeviation", choice)) func = standardDeviation;
      if(readConfigChoiceElement(config, "mean",              choice)) func = mean;
      if(readConfigChoiceElement(config, "median",            choice)) func = median;
      if(readConfigChoiceElement(config, "min",               choice)) func = static_cast<Double(*)(const_MatrixSliceRef)>(&min); // min and max are overloads, and the standard forbids automatic overload resolution in this context. So a manual cast is made in which the overload can be resolved successfully
      if(readConfigChoiceElement(config, "max",               choice)) func = static_cast<Double(*)(const_MatrixSliceRef)>(&max);
      if(readConfigChoiceElement(config, "epochCount",        choice)) func = [] (const_MatrixSliceRef A) {return static_cast<Double>(A.rows());};
      endChoice(config);
    }
    readConfig(config, "startDataFields", startData, Config::DEFAULT,  "0", "start");
    readConfig(config, "countDataFields", countData, Config::OPTIONAL, "",  "number of data fields (default: all)");
    readConfig(config, "perColumn",       perColumn, Config::DEFAULT,  "1", "compute statistic per column");
    readConfig(config, "ignoreNan",       ignoreNan, Config::DEFAULT,  "0", "ignore NaN values in input");
    if(isCreateSchema(config)) return;

    // =================================================

    logStatus<<"read instrument file <"<<fileNameInstrumentIn<<">"<<Log::endl;
    InstrumentFile instrumentFile(fileNameInstrumentIn);
    std::vector<Matrix> rows(instrumentFile.arcCount());
    Parallel::forEach(rows, [&](UInt arcNo)
    {
      const Arc    arc = instrumentFile.readArc(arcNo);
      const Matrix A   = arc.matrix();
      if(!A.size())
        return Matrix();

      // lambda function
      auto removeNan = [](const_MatrixSliceRef A)
      {
        std::vector<Double> data = flatten(A);
        data.erase(std::remove_if(data.begin(), data.end(), [](Double d) {return std::isnan(d);}), data.end());
        return Vector(data);
      };

      // compute statistics
      const UInt dataColumnCount = std::min(A.columns()-1-startData, countData);
      const UInt columnCount     = perColumn ? dataColumnCount : 1;
      Matrix row(1, 1+columnCount);
      row(0, 0) =  (0.5*(arc.front().time+arc.back().time+medianSampling(arc.times()))).mjd();
      for(UInt i=0; i<columnCount; i++)
      {
        const_MatrixSliceRef slice(A.column(1+startData+i, perColumn ? 1 : dataColumnCount));
        row(0, 1+i) = func(ignoreNan ? removeNan(slice) : slice);
      }
      return row;
    }, comm);

    if(Parallel::isMaster(comm))
    {
      // copy rows to matrix
      rows.erase(std::remove_if(rows.begin(), rows.end(), [](const Matrix &A) {return !A.size();}), rows.end());
      Matrix A(rows.size(), rows.at(0).columns());
      for(UInt i=0; i<A.rows(); i++)
        copy(rows.at(i), A.row(i));

      logStatus<<"write arc statistics to instrument file <"<<fileNameInstrumentOut<<">"<<Log::endl;
      InstrumentFile::write(fileNameInstrumentOut, Arc(A));
    }
  }
  catch(std::exception &e)
  {
    GROOPS_RETHROW(e)
  }
}

/***********************************************/