File: SufficientStatisticsQueue.hpp

package info (click to toggle)
salmon 1.10.3%2Bds1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 35,148 kB
  • sloc: cpp: 200,707; ansic: 171,082; sh: 859; python: 792; makefile: 238
file content (110 lines) | stat: -rw-r--r-- 3,491 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#ifndef __SUFFICIENT_STATISTICS_QUEUE_HPP__
#define __SUFFICIENT_STATISTICS_QUEUE_HPP__

#include <cmath>
#include <mutex>
#include <queue>
#include <tuple>

#include "btree_map.h"

#include "SalmonMath.hpp"

class SufficientStatisticsQueue {

  using TranscriptID = uint32_t;

public:
  SufficientStatisticsQueue(uint32_t qlen = 10) : ct_(0), qlen_(qlen) {
    if (qlen_ > 0) {
      logL_ = std::log(static_cast<double>(qlen_));
    } else {
      std::cerr
          << "ERROR: cannot instantiate sufficient statistic queue of length 0";
    }
  }

  bool approxEq(double a, double b) { return std::abs(a - b) < 1e-3; }

  btree::btree_map<TranscriptID, std::tuple<double, size_t>>
  getSmoothedStats(btree::btree_map<TranscriptID, TranscriptAlignments>& Si) {
    using salmon::math::logAdd;
    using salmon::math::logSub;

    std::lock_guard<std::mutex> lg(ssmut_);

    ++ct_;
    if (ct_ % 200 == 0) {
      std::cerr << "\n\n sstatQueue_.size() = " << sstatQueue_.size()
                << ", suf stat map size = " << smoothedSufficientStats_.size()
                << "\n\n\n";
    }

    sstatQueue_.emplace();
    auto& currentSStat = sstatQueue_.back();

    // S_i^L = S_{i-1}^{L} + (S_{i} / L)
    for (auto kv = Si.begin(); kv != Si.end(); ++kv) {
      auto transcriptID = kv->first;
      auto& hits = kv->second;
      double contrib = hits.totalProb - logL_;
      currentSStat[transcriptID] = contrib;
      auto it = smoothedSufficientStats_.find(transcriptID);
      if (it == smoothedSufficientStats_.end()) {
        smoothedSufficientStats_[transcriptID] = std::make_tuple(contrib, 1);
      } else {
        std::get<0>(it->second) = logAdd(std::get<0>(it->second), contrib);
        std::get<1>(it->second) += 1;
      }
    }

    if (sstatQueue_.size() > qlen_) {
      // S_i^L -= (S_{i-L} / L)
      auto& Sil = sstatQueue_.front();
      for (auto kv = Sil.begin(); kv != Sil.end(); ++kv) {
        auto transcriptID = kv->first;
        auto& lprob = kv->second;
        auto it = smoothedSufficientStats_.find(transcriptID);
        if (it == smoothedSufficientStats_.end()) {
          // std::cerr << "\n\nWHAT\n\n\n";
        } else if (std::get<0>(it->second) <= lprob or
                   approxEq(
                       lprob,
                       std::get<0>(
                           it->second))) { // std::get<1>(it->second) == 1) {
          if (!approxEq(std::get<0>(it->second), lprob) and
              std::get<1>(it->second) > 1) {
            std::cerr << "hrmmmm .... remaining mass = "
                      << std::get<0>(it->second) << " < lprob = " << lprob
                      << "\n";
          }
          smoothedSufficientStats_.erase(it);
        } else {
          double before = std::get<0>(it->second);
          double update = logSub(std::get<0>(it->second), lprob);
          std::get<0>(it->second) = update;
          std::get<1>(it->second) -= 1;
          if (std::get<0>(it->second) == salmon::math::LOG_0) {
            smoothedSufficientStats_.erase(it);
          }
        }
      }
      sstatQueue_.pop();
    }

    // make sure this is a copy!
    return smoothedSufficientStats_;
    // lock is freed here
  }

private:
  size_t ct_;
  std::mutex ssmut_;
  std::queue<btree::btree_map<TranscriptID, double>> sstatQueue_;
  btree::btree_map<TranscriptID, std::tuple<double, size_t>>
      smoothedSufficientStats_;
  uint32_t qlen_;
  double logL_;
};

#endif // __SUFFICIENT_STATISTICS_QUEUE_HPP__