1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
/***********************************************/
/**
* @file instrumentArcStatistics.cpp
*
* @brief Compute statistics for arcs of instrument data.
*
* @author Sebastian Strasser
* @author Matthias Ellmer
* @author Torsten Mayer-Guerr
* @date 2016-07-18
*/
/***********************************************/
// Latex documentation
#define DOCSTRING docstring
static const char *docstring = R"(
Computes statistics of selected data columns of \configFile{inputfileInstrument}{instrument} arc wise.
The \configFile{outputfileStatisticsTimeSeries}{instrument} contains for every arc one (mid) epoch
with statistics column(s). Possible statistics are root mean square, standard deviation,
mean, median, min, and max.
With \config{perColumn} separate statistics for each selected data column are computed,
otherwise an overall value is computed.
See also \program{InstrumentArcCrossStatistics}, \program{InstrumentStatisticsTimeSeries}.
)";
/***********************************************/
#include "programs/program.h"
#include "files/fileInstrument.h"
/***** CLASS ***********************************/
/** @brief Compute statistics for arcs of instrument data.
* @ingroup programsGroup */
class InstrumentArcStatistics
{
public:
void run(Config &config, Parallel::CommunicatorPtr comm);
};
GROOPS_REGISTER_PROGRAM(InstrumentArcStatistics, PARALLEL, "Compute statistics for arcs of instrument data", Instrument, TimeSeries, Statistics)
/***********************************************/
void InstrumentArcStatistics::run(Config &config, Parallel::CommunicatorPtr comm)
{
try
{
FileName fileNameInstrumentOut, fileNameInstrumentIn;
UInt startData, countData = MAX_UINT;
Bool perColumn, ignoreNan;
std::string choice;
std::function<double(const_MatrixSliceRef A)> func;
readConfig(config, "outputfileStatisticsTimeSeries", fileNameInstrumentOut, Config::MUSTSET, "", "columns: mjd, statistics column(s) per instrument file");
readConfig(config, "inputfileInstrument", fileNameInstrumentIn, Config::MUSTSET, "", "");
if(readConfigChoice(config, "statistics", choice, Config::MUSTSET, "", ""))
{
if(readConfigChoiceElement(config, "rootMeanSquare", choice)) func = rootMeanSquare;
if(readConfigChoiceElement(config, "standardDeviation", choice)) func = standardDeviation;
if(readConfigChoiceElement(config, "mean", choice)) func = mean;
if(readConfigChoiceElement(config, "median", choice)) func = median;
if(readConfigChoiceElement(config, "min", choice)) func = static_cast<Double(*)(const_MatrixSliceRef)>(&min); // min and max are overloads, and the standard forbids automatic overload resolution in this context. So a manual cast is made in which the overload can be resolved successfully
if(readConfigChoiceElement(config, "max", choice)) func = static_cast<Double(*)(const_MatrixSliceRef)>(&max);
if(readConfigChoiceElement(config, "epochCount", choice)) func = [] (const_MatrixSliceRef A) {return static_cast<Double>(A.rows());};
endChoice(config);
}
readConfig(config, "startDataFields", startData, Config::DEFAULT, "0", "start");
readConfig(config, "countDataFields", countData, Config::OPTIONAL, "", "number of data fields (default: all)");
readConfig(config, "perColumn", perColumn, Config::DEFAULT, "1", "compute statistic per column");
readConfig(config, "ignoreNan", ignoreNan, Config::DEFAULT, "0", "ignore NaN values in input");
if(isCreateSchema(config)) return;
// =================================================
logStatus<<"read instrument file <"<<fileNameInstrumentIn<<">"<<Log::endl;
InstrumentFile instrumentFile(fileNameInstrumentIn);
std::vector<Matrix> rows(instrumentFile.arcCount());
Parallel::forEach(rows, [&](UInt arcNo)
{
const Arc arc = instrumentFile.readArc(arcNo);
const Matrix A = arc.matrix();
if(!A.size())
return Matrix();
// lambda function
auto removeNan = [](const_MatrixSliceRef A)
{
std::vector<Double> data = flatten(A);
data.erase(std::remove_if(data.begin(), data.end(), [](Double d) {return std::isnan(d);}), data.end());
return Vector(data);
};
// compute statistics
const UInt dataColumnCount = std::min(A.columns()-1-startData, countData);
const UInt columnCount = perColumn ? dataColumnCount : 1;
Matrix row(1, 1+columnCount);
row(0, 0) = (0.5*(arc.front().time+arc.back().time+medianSampling(arc.times()))).mjd();
for(UInt i=0; i<columnCount; i++)
{
const_MatrixSliceRef slice(A.column(1+startData+i, perColumn ? 1 : dataColumnCount));
row(0, 1+i) = func(ignoreNan ? removeNan(slice) : slice);
}
return row;
}, comm);
if(Parallel::isMaster(comm))
{
// copy rows to matrix
rows.erase(std::remove_if(rows.begin(), rows.end(), [](const Matrix &A) {return !A.size();}), rows.end());
Matrix A(rows.size(), rows.at(0).columns());
for(UInt i=0; i<A.rows(); i++)
copy(rows.at(i), A.row(i));
logStatus<<"write arc statistics to instrument file <"<<fileNameInstrumentOut<<">"<<Log::endl;
InstrumentFile::write(fileNameInstrumentOut, Arc(A));
}
}
catch(std::exception &e)
{
GROOPS_RETHROW(e)
}
}
/***********************************************/
|