File: instrumentConcatenate.cpp

package info (click to toggle)
groops 0%2Bgit20250907%2Bds-1
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 11,140 kB
  • sloc: cpp: 135,607; fortran: 1,603; makefile: 20
file content (138 lines) | stat: -rw-r--r-- 4,815 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/***********************************************/
/**
* @file instrumentConcatenate.cpp
*
* @brief Concatenate arcs from several files.
*
* @author Torsten Mayer-Guerr
* @author Norbert Zehentner
* @date 2001-06-08
*/
/***********************************************/

// Latex documentation
#define DOCSTRING docstring
static const char *docstring = R"(
This program concatenate the arcs from several \file{instrument files}{instrument}
and write it to a new \file{file}{instrument}. Input files must be of the same type.
The arcs are merged to one arc even though there is a gap inbetween.
To split the data into arcs use \program{InstrumentSynchronize}.
Three options are available: \config{sort}, \config{removeDuplicates} and \config{checkForNaNs}.
If \config{sort} is enabled, the program reads all files, no matter if they are sorted correctly in time, and
then sorts the epochs. If \config{removeDuplicates} is enabled, the program checks the whole data set
for epochs that are contained twice. And if \config{checkForNaNs} is enabled the data set is checked for
invalid epochs containing NaNs.
)";

/***********************************************/

#include "programs/program.h"
#include "files/fileInstrument.h"

/***** CLASS ***********************************/

/** @brief Concatenate arcs from several files.
* @ingroup programsGroup */
class InstrumentConcatenate
{
public:
  void run(Config &config, Parallel::CommunicatorPtr comm);
};

GROOPS_REGISTER_PROGRAM(InstrumentConcatenate, SINGLEPROCESS, "concatenate arcs from several files", Instrument)
GROOPS_RENAMED_PROGRAM(ArcConcatenate, InstrumentConcatenate, date2time(2020, 05, 25))

/***********************************************/

void InstrumentConcatenate::run(Config &config, Parallel::CommunicatorPtr /*comm*/)
{
  try
  {
    FileName outName;
    std::vector<FileName> inName;
    Bool                  sort, checkNaN;
    std::string           choiceRemoveDuplicates;
    Double                margin;

    readConfig(config, "outputfile", outName, Config::MUSTSET,  "",  "");
    readConfig(config, "inputfile",  inName,  Config::MUSTSET,  "",  "");
    readConfig(config, "sort",       sort,    Config::DEFAULT,  "0", "sort epochs with increasing time");
    if(readConfigChoice(config, "removeDuplicates", choiceRemoveDuplicates, Config::OPTIONAL, "", "remove duplicate epochs"))
    {
      if(readConfigChoiceElement(config, "keepFirst", choiceRemoveDuplicates, "keep first epoch with the same time stamp, remove all others"))
        readConfig(config, "margin", margin, Config::DEFAULT, "1e-5", "margin for identical times [seconds]");
      if(readConfigChoiceElement(config, "keepLast",  choiceRemoveDuplicates, "keep last epoch with the same time stamp, remove all others"))
        readConfig(config, "margin", margin, Config::DEFAULT, "1e-5", "margin for identical times [seconds]");
      endChoice(config);
    }
    readConfig(config, "checkForNaNs", checkNaN, Config::DEFAULT,  "0", "remove epochs with NaN values in one of the data fields");
    if(isCreateSchema(config)) return;

    // read data
    // ---------
    Arc arc;
    for(UInt i=0; i<inName.size(); i++)
    {
      try
      {
        logStatus<<"read instrument file <"<<inName.at(i)<<">"<<Log::endl;
        arc.append(InstrumentFile::read(inName.at(i)));
      }
      catch(std::exception &e)
      {
        logWarning<<e.what()<<" continue..."<<Log::endl;
        continue;
      }
    }

    // sort data
    // ---------
    if(sort || (choiceRemoveDuplicates == "keepFirst" || choiceRemoveDuplicates == "keepLast"))
    {
      logStatus<<"sort epochs"<<Log::endl;
      arc.sort();
    }

    // eliminate duplicates
    // --------------------
    if(choiceRemoveDuplicates == "keepFirst" || choiceRemoveDuplicates == "keepLast")
    {
      logStatus<<"eliminate duplicates"<<Log::endl;
      UInt oldSize = arc.size();
      arc.removeDuplicateEpochs(choiceRemoveDuplicates == "keepFirst", margin);
      logInfo<<" "<<oldSize-arc.size()<<" duplicates removed!"<<Log::endl;
    }

    // eliminate NaNs
    // --------------------
    if(checkNaN)
    {
      logStatus<<"search for NaNs"<<Log::endl;
      UInt removed=0;
      Log::Timer timer(arc.size());
      for(UInt i=0; i<arc.size(); i++)
      {
        timer.loopStep(i);
        if(std::isnan(sum(arc.at(i).data())))
        {
          arc.remove(i--);
          removed++;
        }
      }
      timer.loopEnd();
      logInfo<<" "<<removed<<" epochs with NaN values removed!"<<Log::endl;
    }

    // save
    // ----
    logStatus<<"write instrument file <"<<outName<<">"<<Log::endl;
    InstrumentFile::write(outName, arc);
    Arc::printStatistics(arc);
  }
  catch(std::exception &e)
  {
    GROOPS_RETHROW(e)
  }
}

/***********************************************/