File: TrimmedMeanVisitor.hpp

package info (click to toggle)
bedops 2.4.41%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 21,148 kB
  • sloc: ansic: 28,562; cpp: 15,359; sh: 2,704; makefile: 2,687; xml: 1,669; python: 1,582; csh: 823; perl: 365; java: 172
file content (225 lines) | stat: -rw-r--r-- 8,184 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/*
  Author: Shane Neph & Scott Kuehn
  Date:   Mon Aug 20 14:22:38 PDT 2007
*/
//
//    BEDOPS
//    Copyright (C) 2011-2022 Shane Neph, Scott Kuehn and Alex Reynolds
//
//    This program is free software; you can redistribute it and/or modify
//    it under the terms of the GNU General Public License as published by
//    the Free Software Foundation; either version 2 of the License, or
//    (at your option) any later version.
//
//    This program is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU General Public License for more details.
//
//    You should have received a copy of the GNU General Public License along
//    with this program; if not, write to the Free Software Foundation, Inc.,
//    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
//

#ifndef TMEANS_VISITOR_HPP
#define TMEANS_VISITOR_HPP

#include <cmath>
#include <cstdlib>
#include <limits>
#include <set>
#include <string>

#include "data/measurement/NaN.hpp"
#include "data/measurement/SelectMeasureType.hpp"
#include "utility/Assertion.hpp"
#include "utility/Exception.hpp"
#include "utility/OrderCompare.hpp"

namespace Visitors {

  template <
            typename Process,
            typename BaseVisitor,
            typename ExceptionType = Ext::ArgumentError
           >
  struct TrimmedMean : BaseVisitor {

    typedef BaseVisitor BaseClass;
    typedef Process ProcessType;
    typedef typename BaseClass::RefType RefType;
    typedef typename BaseClass::MapType MapType;
    typedef MapType* PtrType;
    typedef Ordering::CompValueThenAddressLesser<MapType, MapType> Comp;
    typedef std::set<PtrType, Comp> ScoreTypeContainer;
    typedef typename Signal::SelectMeasure<MapType>::MeasureType MT;

    //==============
    // Construction
    //==============
    explicit TrimmedMean(double lowerKth=0.2, double upperKth = 0.8, const ProcessType& pt = ProcessType())
        : lowerKth_(lowerKth), upperKth_(upperKth), lowerSum_(0), upperSum_(0), currentAtPosLower_(0),
          currentAtPosUpper_(0), doKth_(false), symmetric_(false), pt_(pt) {
      currentMarkerLower_ = scoresBuf_.end(), currentMarkerUpper_ = scoresBuf_.end();
      Ext::Assert<ExceptionType>(lowerKth_ >= 0 && lowerKth_ <= 1, "Expect 0 <= lowerKth <= 1");
      Ext::Assert<ExceptionType>(upperKth_ >= 0 && upperKth_ <= 1, "Expect 0 <= upperKth <= 1");
      const double epsilon = std::numeric_limits<double>::epsilon();
      if ( std::abs(1.0-lowerKth-upperKth) <= epsilon )
        doKth_ = true;

      if ( std::abs(lowerKth_ - upperKth_) <= epsilon ) // symmetric: trim same # elements from both ends
        symmetric_ = true;
      Ext::Assert<ExceptionType>(lowerKth + upperKth <= 1+epsilon, "Expect lowerKth + upperKth <= 1");
    }

    //======================================
    // Windowing Phase : Add() and Delete()
    //======================================
    inline void Add(PtrType ptr) {
      scoresBuf_.insert(ptr);
      if ( lowerKth_ > 0 && !doKth_ )
        add(ptr, currentMarkerLower_, currentAtPosLower_, lowerSum_);
      add(ptr, currentMarkerUpper_, currentAtPosUpper_, upperSum_);
    }

    inline void Delete(PtrType toRemove) {
      // keep in mind that you cannot be here if there is <= 1 element
      //  in the scoresBuf_ containers.
      if ( lowerKth_ > 0 && !doKth_ )
        remove(toRemove, currentMarkerLower_, currentAtPosLower_, lowerSum_);
      remove(toRemove, currentMarkerUpper_, currentAtPosUpper_, upperSum_);
      scoresBuf_.erase(toRemove);
    }

    //====================================
    // Repositioning and Reporting Phases
    //====================================
    inline void DoneReference() {
      // currentMarkerLower_ marks the last element in the subsequence to be ignored
      //       (not one passed that, but on that)
      // currentMarkerUpper_ marks the last element to be included in the output
      //       (not one passed that, but on that)
      // output mean of sequence (..] formed by currentMarkerLower_ to currentMarkerUpper_
      //
      // if doKth_ is true, then we are just reporting the element pointed to by currentMarkerUpper_
      //   following its update.  This is really a kth usage.  Bob T. found certain math packages
      //   work this way, where, for example, tmean is called with 0.3 and 0.7.  The lower and upper
      //   markers end up pointing to the same element.  That element is not part of the sequence of
      //   interest for the lower marker, but is part of the sequence of interest for the upper marker.
      //   So, another way to get the kth element.

      // guaranteed: 0 <= kth-value <= 1
      if ( scoresBuf_.empty() ) {
        static const Signal::NaN nan = Signal::NaN();
        pt_.operator()(nan);
        return;
      }

      std::size_t size = scoresBuf_.size();
      std::size_t kthPosLow = static_cast<std::size_t>(iround(lowerKth_ * size));
      std::size_t kthPosHigh = static_cast<std::size_t>(iround(upperKth_ * size));
      kthPosHigh = size - kthPosHigh;
      if ( symmetric_ ) {
        kthPosLow = std::max(kthPosLow, size - kthPosHigh);
        kthPosHigh = size - kthPosLow;
      }
      bool doLow = kthPosLow > 0;

      if ( doLow )
        --kthPosLow; // make zero-based
      if ( kthPosHigh > 0 )
        --kthPosHigh; // make zero-based

      if ( !doKth_ && doLow )
        doneRef(currentMarkerLower_, currentAtPosLower_, lowerSum_, kthPosLow);
      doneRef(currentMarkerUpper_, currentAtPosUpper_, upperSum_, kthPosHigh);

      // Spit results
      if ( doKth_ || currentAtPosUpper_ == currentAtPosLower_ )
        pt_.operator()(*currentMarkerUpper_); // single element related to kth - Bob T. thinks this is best
      else if ( doLow )
        pt_.operator()((upperSum_ - lowerSum_)/(currentAtPosUpper_ - currentAtPosLower_));
       else
         pt_.operator()(upperSum_ / (currentAtPosUpper_ + 1));
    }

    //===========
    // Cleanup()
    //===========
    virtual ~TrimmedMean()
      { /* */ }


  protected:
    inline void add(PtrType ptr, typename ScoreTypeContainer::iterator& marker, std::size_t& pos, MT& sum) {
      static Comp comp;
      if ( marker == scoresBuf_.end() ) {
        marker = scoresBuf_.begin();
        pos = 0;
        sum = *ptr;
      } else if ( comp(ptr, *marker) ) { // ptr < marker
        ++pos;
        sum += *ptr;
      }
    }

    inline void remove(PtrType ptr, typename ScoreTypeContainer::iterator& marker, std::size_t& pos, MT& sum) {
      // keep in mind that you cannot be here if there is <= 1 element
      //  in the scoresBuf_ containers.
      static Comp comp;
      if ( comp(ptr, *marker) ) { // toRemove < marker
        --pos;
        sum -= *ptr;
      }
      else if ( ptr == *marker ) { // removing marker
        sum -= *ptr;
        if ( marker != scoresBuf_.begin() ) {
          --marker;
          --pos;
        } else {
          if ( ++marker != scoresBuf_.end() )
            sum += **marker;
          // pos remains the same
        }
      }
    }

    inline void doneRef(typename ScoreTypeContainer::iterator& marker, std::size_t& pos, MT& sum, std::size_t newPos) {

      // Increment markers as needed
      while ( newPos > pos ) {
        sum += **++marker;
        ++pos;
      } // while

      // Decrement markers as needed
      while ( newPos < pos ) {
        sum -= **marker--;
        --pos;
      } // while
    }

  protected:
    const double lowerKth_;
    const double upperKth_;
    MT lowerSum_, upperSum_;
    std::size_t currentAtPosLower_, currentAtPosUpper_;
    bool doKth_, symmetric_;
    ProcessType pt_;
    ScoreTypeContainer scoresBuf_;
    typename ScoreTypeContainer::iterator currentMarkerLower_, currentMarkerUpper_;

  private:
    inline double iround(double d) {
      double d1 = std::ceil(d);
      return( (d >= 0) ?
              ((d1-d > 0.5) ? std::floor(d) : d1)
                       :
              ((d1-d >= 0.5) ? std::floor(d) : d1)
            );
    }
  };

} // namespace Visitors

#endif // TMEANS_VISITOR_HPP