File: instrumentRemoveEpochsByCriteria.cpp

package info (click to toggle)
groops 0%2Bgit20250907%2Bds-1
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 11,140 kB
  • sloc: cpp: 135,607; fortran: 1,603; makefile: 20
file content (174 lines) | stat: -rw-r--r-- 6,740 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/***********************************************/
/**
* @file instrumentRemoveEpochsByCriteria.cpp
*
* @brief Remove epochs through evaluating expressions
*
* @author Norbert Zehentner
* @author Beate Klinger
* @author Matthias Ellmer
* @author Andreas Kvas
* @date 2015-05-19
*
*/
/***********************************************/

// Latex documentation
#define DOCSTRING docstring
static const char *docstring = R"(
This program removes epochs from \configFile{inputfileInstrument}{instrument}
by evaluating a set of \config{removalCriteria} expressions. For the data
columns the standard data variables are available,
see~\reference{dataVariables}{general.parser:dataVariables}.

The instrument data can be reduced by data from \configFile{inputfileInstrumentReference}{instrument}
prior to evaluation of the expressions.

To reduce the data by its median, use an expression like \verb|data1-data1mean|.
To remove epochs that deviate by more than 3 sigma use \verb|abs(data1)>3*data1std|
or \verb|abs(data0-data0median)>3*1.4826*data0mad|.

All arcs in the input instrument file are concatenated, meaning expressions
like \verb|data1mean| refer to the complete dataset. The removed epochs can be saved
in a separate \configFile{outputfileInstrumentRemovedEpochs}{instrument}.
)";

/***********************************************/

#include "programs/program.h"
#include "parser/dataVariables.h"
#include "files/fileInstrument.h"

/***** CLASS ***********************************/

/** @brief Remove epochs which meet one of several criteria.
* @ingroup programsGroup */
class InstrumentRemoveEpochsByCriteria
{
public:
  void run(Config &config, Parallel::CommunicatorPtr comm);
};

GROOPS_REGISTER_PROGRAM(InstrumentRemoveEpochsByCriteria, SINGLEPROCESS, "Remove epochs which criteria defined through expressions", Instrument)

/***********************************************/

void InstrumentRemoveEpochsByCriteria::run(Config &config, Parallel::CommunicatorPtr /*comm*/)
{
  try
  {
    FileName fileNameOut, fileNameRemovedEpochs;
    FileName fileNameIn,  fileNameRef;
    std::vector<ExpressionVariablePtr> expressions;
    Double   margin;

    readConfig(config, "outputfileInstrument",               fileNameOut,           Config::OPTIONAL, "", "all data is stored in one arc");
    readConfig(config, "outputfileInstrumentRemovedEpochs",  fileNameRemovedEpochs, Config::OPTIONAL, "", "all data is stored in one arc");
    readConfig(config, "inputfileInstrument",                fileNameIn,            Config::MUSTSET,  "", "arcs are concatenated for processing");
    readConfig(config, "inputfileInstrumentReference",       fileNameRef,           Config::OPTIONAL, "", "if given, the reference data is reduced prior to the expressions being evaluated");
    readConfig(config, "removalCriteria",                    expressions,           Config::MUSTSET,  "abs(data0-data0median) > 3*1.4826*data0mad",  "epochs are removed if one criterion evaluates true. data0 is the first data field.");
    readConfig(config, "margin",                             margin,                Config::DEFAULT,  "1e-5", "remove data around identified epochs (on both sides) [seconds]");
    if(isCreateSchema(config)) return;

    // ======================================================

    // read instrument data
    // --------------------
    logStatus<<"read instrument data <"<<fileNameIn<<">"<<Log::endl;
    const Arc arc = InstrumentFile::read(fileNameIn);
    Matrix data = arc.matrix();
    if(data.columns()<2)
      throw(Exception("input file <"+fileNameIn.str()+"> does not have enough columns (Found: "+data.columns()%"%i)."s));


    // reduce reference data
    // ---------------------
    if(!fileNameRef.empty())
    {
      logStatus<<"read reference data <"<<fileNameRef<<">"<<Log::endl;
      const Arc arcRef = InstrumentFile::read(fileNameRef);
      Arc::checkSynchronized({arc, arcRef});

      // Reduce reference data
      Matrix reference = arcRef.matrix();
      if(reference.columns() != data.columns())
        throw(Exception("Reference file <"+fileNameRef.str()+"> has incompatible number of data columns with data file <"+fileNameIn.str()+"> ("+ reference.columns() % "%i vs "s + data.columns() % "%i )."s));
      axpy(-1., reference.column(1, reference.columns()-1), data.column(1, data.columns()-1) );
    }
    // Remove time column
    data = data.column(1, data.columns()-1);

    // ======================================================

    // initialize data variables
    // -------------------------
    VariableList varList;
    addDataVariables(data, varList);
    for(UInt i=0; i<expressions.size(); i++)
      expressions.at(i)->simplify(varList);

    // criteria evaluation
    // -------------------
    std::vector<Time> times;
    logStatus<<"evaluate criteria"<<Log::endl;
    Single::forEach(data.rows(), [&](UInt idEpoch)
    {
      evaluateDataVariables(data, idEpoch, varList);
      for(UInt i=0; i<expressions.size(); i++)
        if(expressions.at(i)->evaluate(varList))
        {
          times.push_back(arc.at(idEpoch).time);
          break;
        }
    });

    // ======================================================

    // remove epochs within buffer
    // ---------------------------
    Arc arcNew, arcEpochsForRemoval;
    if(times.size())
    {
      logStatus<<"remove epochs (+/- "<<margin<<" sec) from instrument data"<<Log::endl;
      UInt idxTime = 0;
      Single::forEach(arc.size(), [&](UInt i)
      {
        while((idxTime < times.size()) && ((times.at(idxTime)-arc.at(i).time).seconds() < -margin))
          idxTime++;
        if((idxTime >= times.size()) || ((times.at(idxTime)-arc.at(i).time).seconds() > +margin))
          arcNew.push_back(arc.at(i));
        else
          arcEpochsForRemoval.push_back(arc.at(i));
      });
    }
    else
    {
      arcNew = arc;
    }

    logInfo<<"  "<<arcEpochsForRemoval.size()<<" epochs meet criteria"<<Log::endl;
    logInfo<<"  "<<arc.size()-arcNew.size()<<" epochs removed"<<Log::endl;

    // ======================================================

    // save file
    // ---------
    if((!fileNameOut.empty()) && ((times.size()!=0) || (fileNameIn.str() != fileNameOut.str())))
    {
      logStatus<<"write instrument data to file <"<<fileNameOut<<">"<<Log::endl;
      InstrumentFile::write(fileNameOut, arcNew);
      Arc::printStatistics(arcNew);
    }
    if(!fileNameRemovedEpochs.empty())
    {
      logStatus<<"write removed epochs to file <"<<fileNameRemovedEpochs<<">"<<Log::endl;
      InstrumentFile::write(fileNameRemovedEpochs, arcEpochsForRemoval);
    }
  }
  catch(std::exception &e)
  {
    GROOPS_RETHROW(e)
  }
}

/***********************************************/