File: instrumentInsertNAN.cpp

package info (click to toggle)
groops 0%2Bgit20250907%2Bds-1
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 11,140 kB
  • sloc: cpp: 135,607; fortran: 1,603; makefile: 20
file content (156 lines) | stat: -rw-r--r-- 4,868 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/***********************************************/
/**
* @file instrumentInsertNAN.cpp
*
* @brief insert epochs with data NAN into instrument files
*
* @author Matthias Ellmer
* @date 2017-11-28
*
*/
/***********************************************/

// Latex documentation
#define DOCSTRING docstring
static const char *docstring = R"(
This program inserts NAN epochs into \configFile{inputfileInstrument}{instrument} files,
either at specific \configClass{times}{timeSeriesType} or where gaps in the instrument are detected.
)";

/***********************************************/

#include "programs/program.h"
#include "classes/timeSeries/timeSeries.h"
#include "files/fileInstrument.h"

/***** CLASS ***********************************/

/** @brief Insert epochs with data NAN into instrument files
* @ingroup programsGroup */
class InstrumentInsertNAN
{
public:
  void run(Config &config, Parallel::CommunicatorPtr comm);
};

GROOPS_REGISTER_PROGRAM(InstrumentInsertNAN, PARALLEL, "Insert epochs with data NAN into instrument files", Instrument, Plot)

/***********************************************/

void InstrumentInsertNAN::run(Config &config, Parallel::CommunicatorPtr comm)
{
  try
  {
    FileName      inputfileInstrument, outputfileInstrument;
    TimeSeriesPtr timeSeries;
    Bool          atGaps;
    Bool          atArcEnds;

    readConfig(config, "outputfileInstrument", outputfileInstrument, Config::MUSTSET, "",  "");
    readConfig(config, "inputfileInstrument",  inputfileInstrument,  Config::MUSTSET, "",  "");
    readConfig(config, "times",                timeSeries,           Config::DEFAULT, "",  "Insert NAN at specific times.");
    readConfig(config, "atGaps",               atGaps,               Config::DEFAULT, "0", "Insert NAN where epochs are more than 1.5 times the median sampling apart.");
    readConfig(config, "atArcEnds",            atArcEnds,            Config::DEFAULT, "0", "Insert one epoch with data NAN at arc ends");
    if(isCreateSchema(config)) return;

    const auto nanTimes = timeSeries->times();

    InstrumentFile inFile(inputfileInstrument);
    const UInt arcCount = inFile.arcCount();

    std::vector<Arc> arcList(arcCount);
    UInt total = 0;
    Parallel::forEach(arcList, [&](UInt arcNo)
    {
      Arc arc = inFile.readArc(arcNo);
      if(!arc.size())
        return arc;

      Epoch *epoch = Epoch::create(arc.getType());
      epoch->setData(arc.at(0).data() * NAN_EXPR);

      const auto times = arc.times();
      const Time epsilonTime = Time(0,std::numeric_limits<Double>::epsilon());

      // ----------

      if(nanTimes.size())
      {
        // First value in nanTimes larger than times.begin()
        UInt nanIndex = std::distance(nanTimes.begin(), std::upper_bound(nanTimes.begin(), nanTimes.end(), times.front()));
        auto it = times.begin();

        std::vector<UInt> indicesToInsert;
        for(; nanIndex<nanTimes.size(); nanIndex++)
        {
          // Find the position of the nan epoch in this arc
          it = std::lower_bound(it, times.end(), nanTimes.at(nanIndex));
          if(it == times.end())
            break;

          indicesToInsert.push_back(std::distance(times.begin(), it));
        }

        // Insert starting from back to not screw up the indices.
        std::reverse(indicesToInsert.begin(), indicesToInsert.end());
        for(const UInt i : indicesToInsert)
        {
          epoch->time = times.at(i-1) + epsilonTime;
          arc.insert(i, *epoch);
          total++;
        }
      }

      // ----------

      if(atGaps && times.size() > 2)
      {
        std::vector<Time> diff(times.size());
        std::adjacent_difference(times.begin(), times.end(), diff.begin());

        std::vector<UInt> indicesToInsert;
        const Double minGap = 1.5 * medianSampling(times).seconds();
        for(UInt i=1; i<diff.size(); i++)
        {
          if(diff.at(i).seconds() > minGap)
            indicesToInsert.push_back(i);
        }

        std::reverse(indicesToInsert.begin(), indicesToInsert.end());
        for(const UInt i : indicesToInsert)
        {
          epoch->time = times.at(i-1) + epsilonTime;
          arc.insert(i, *epoch);
          total++;
        }
      }

      // ----------

      if(atArcEnds)
      {
        epoch->time = times.back() + epsilonTime;
        arc.push_back(*epoch);
        total++;
      }

      return arc;
    }, comm);

    Parallel::reduceSum(total, 0, comm);
    logStatus<<"  inserted "<<total<<" NaNs"<<Log::endl;

    if(Parallel::isMaster(comm))
    {
      logStatus<<"write instrument to file <"<<outputfileInstrument<<">"<<Log::endl;
      InstrumentFile::write(outputfileInstrument, arcList);
    }
  }
  catch(std::exception &e)
  {
    GROOPS_RETHROW(e)
  }
}

/***********************************************/
/***********************************************/