1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
|
/***********************************************/
/**
* @file instrumentRemoveEpochsByCriteria.cpp
*
* @brief Remove epochs through evaluating expressions
*
* @author Norbert Zehentner
* @author Beate Klinger
* @author Matthias Ellmer
* @author Andreas Kvas
* @date 2015-05-19
*
*/
/***********************************************/
// Latex documentation
#define DOCSTRING docstring
static const char *docstring = R"(
This program removes epochs from \configFile{inputfileInstrument}{instrument}
by evaluating a set of \config{removalCriteria} expressions. For the data
columns the standard data variables are available,
see~\reference{dataVariables}{general.parser:dataVariables}.
The instrument data can be reduced by data from \configFile{inputfileInstrumentReference}{instrument}
prior to evaluation of the expressions.
To reduce the data by its median, use an expression like \verb|data1-data1mean|.
To remove epochs that deviate by more than 3 sigma use \verb|abs(data1)>3*data1std|
or \verb|abs(data0-data0median)>3*1.4826*data0mad|.
All arcs in the input instrument file are concatenated, meaning expressions
like \verb|data1mean| refer to the complete dataset. The removed epochs can be saved
in a separate \configFile{outputfileInstrumentRemovedEpochs}{instrument}.
)";
/***********************************************/
#include "programs/program.h"
#include "parser/dataVariables.h"
#include "files/fileInstrument.h"
/***** CLASS ***********************************/
/** @brief Remove epochs which meet one of several criteria.
* @ingroup programsGroup */
class InstrumentRemoveEpochsByCriteria
{
public:
void run(Config &config, Parallel::CommunicatorPtr comm);
};
GROOPS_REGISTER_PROGRAM(InstrumentRemoveEpochsByCriteria, SINGLEPROCESS, "Remove epochs which criteria defined through expressions", Instrument)
/***********************************************/
void InstrumentRemoveEpochsByCriteria::run(Config &config, Parallel::CommunicatorPtr /*comm*/)
{
try
{
FileName fileNameOut, fileNameRemovedEpochs;
FileName fileNameIn, fileNameRef;
std::vector<ExpressionVariablePtr> expressions;
Double margin;
readConfig(config, "outputfileInstrument", fileNameOut, Config::OPTIONAL, "", "all data is stored in one arc");
readConfig(config, "outputfileInstrumentRemovedEpochs", fileNameRemovedEpochs, Config::OPTIONAL, "", "all data is stored in one arc");
readConfig(config, "inputfileInstrument", fileNameIn, Config::MUSTSET, "", "arcs are concatenated for processing");
readConfig(config, "inputfileInstrumentReference", fileNameRef, Config::OPTIONAL, "", "if given, the reference data is reduced prior to the expressions being evaluated");
readConfig(config, "removalCriteria", expressions, Config::MUSTSET, "abs(data0-data0median) > 3*1.4826*data0mad", "epochs are removed if one criterion evaluates true. data0 is the first data field.");
readConfig(config, "margin", margin, Config::DEFAULT, "1e-5", "remove data around identified epochs (on both sides) [seconds]");
if(isCreateSchema(config)) return;
// ======================================================
// read instrument data
// --------------------
logStatus<<"read instrument data <"<<fileNameIn<<">"<<Log::endl;
const Arc arc = InstrumentFile::read(fileNameIn);
Matrix data = arc.matrix();
if(data.columns()<2)
throw(Exception("input file <"+fileNameIn.str()+"> does not have enough columns (Found: "+data.columns()%"%i)."s));
// reduce reference data
// ---------------------
if(!fileNameRef.empty())
{
logStatus<<"read reference data <"<<fileNameRef<<">"<<Log::endl;
const Arc arcRef = InstrumentFile::read(fileNameRef);
Arc::checkSynchronized({arc, arcRef});
// Reduce reference data
Matrix reference = arcRef.matrix();
if(reference.columns() != data.columns())
throw(Exception("Reference file <"+fileNameRef.str()+"> has incompatible number of data columns with data file <"+fileNameIn.str()+"> ("+ reference.columns() % "%i vs "s + data.columns() % "%i )."s));
axpy(-1., reference.column(1, reference.columns()-1), data.column(1, data.columns()-1) );
}
// Remove time column
data = data.column(1, data.columns()-1);
// ======================================================
// initialize data variables
// -------------------------
VariableList varList;
addDataVariables(data, varList);
for(UInt i=0; i<expressions.size(); i++)
expressions.at(i)->simplify(varList);
// criteria evaluation
// -------------------
std::vector<Time> times;
logStatus<<"evaluate criteria"<<Log::endl;
Single::forEach(data.rows(), [&](UInt idEpoch)
{
evaluateDataVariables(data, idEpoch, varList);
for(UInt i=0; i<expressions.size(); i++)
if(expressions.at(i)->evaluate(varList))
{
times.push_back(arc.at(idEpoch).time);
break;
}
});
// ======================================================
// remove epochs within buffer
// ---------------------------
Arc arcNew, arcEpochsForRemoval;
if(times.size())
{
logStatus<<"remove epochs (+/- "<<margin<<" sec) from instrument data"<<Log::endl;
UInt idxTime = 0;
Single::forEach(arc.size(), [&](UInt i)
{
while((idxTime < times.size()) && ((times.at(idxTime)-arc.at(i).time).seconds() < -margin))
idxTime++;
if((idxTime >= times.size()) || ((times.at(idxTime)-arc.at(i).time).seconds() > +margin))
arcNew.push_back(arc.at(i));
else
arcEpochsForRemoval.push_back(arc.at(i));
});
}
else
{
arcNew = arc;
}
logInfo<<" "<<arcEpochsForRemoval.size()<<" epochs meet criteria"<<Log::endl;
logInfo<<" "<<arc.size()-arcNew.size()<<" epochs removed"<<Log::endl;
// ======================================================
// save file
// ---------
if((!fileNameOut.empty()) && ((times.size()!=0) || (fileNameIn.str() != fileNameOut.str())))
{
logStatus<<"write instrument data to file <"<<fileNameOut<<">"<<Log::endl;
InstrumentFile::write(fileNameOut, arcNew);
Arc::printStatistics(arcNew);
}
if(!fileNameRemovedEpochs.empty())
{
logStatus<<"write removed epochs to file <"<<fileNameRemovedEpochs<<">"<<Log::endl;
InstrumentFile::write(fileNameRemovedEpochs, arcEpochsForRemoval);
}
}
catch(std::exception &e)
{
GROOPS_RETHROW(e)
}
}
/***********************************************/
|