File: SufficientStatisticsQueue.hpp

package info (click to toggle)
salmon 0.7.2%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,352 kB
  • ctags: 5,243
  • sloc: cpp: 42,341; ansic: 6,252; python: 228; makefile: 207; sh: 190
file content (103 lines) | stat: -rw-r--r-- 4,033 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#ifndef __SUFFICIENT_STATISTICS_QUEUE_HPP__
#define __SUFFICIENT_STATISTICS_QUEUE_HPP__

#include <queue>
#include <cmath>
#include <mutex>
#include <tuple>

#include "btree_map.h"

#include "SalmonMath.hpp"

class SufficientStatisticsQueue {

    using TranscriptID = uint32_t;
    public:
        SufficientStatisticsQueue(uint32_t qlen=10) : ct_(0), qlen_(qlen) {
            if (qlen_ > 0) {
                logL_ = std::log(static_cast<double>(qlen_));
            } else {
                std::cerr << "ERROR: cannot instantiate sufficient statistic queue of length 0";
            }
        }

        bool approxEq(double a, double b) {
            return std::abs(a - b) < 1e-3;
        }

        btree::btree_map<TranscriptID, std::tuple<double,size_t>> getSmoothedStats(
                btree::btree_map<TranscriptID, TranscriptAlignments>& Si ) {
            using salmon::math::logAdd;
            using salmon::math::logSub;

            std::lock_guard<std::mutex> lg(ssmut_);

            ++ct_;
            if (ct_ % 200 == 0) {
                std::cerr << "\n\n sstatQueue_.size() = " << sstatQueue_.size() << ", suf stat map size = " << smoothedSufficientStats_.size() << "\n\n\n";
            }

            sstatQueue_.emplace();
            auto& currentSStat = sstatQueue_.back();

            // S_i^L = S_{i-1}^{L} + (S_{i} / L)
            for (auto kv = Si.begin(); kv != Si.end(); ++kv) {
                auto transcriptID = kv->first;
                auto& hits = kv->second;
                double contrib = hits.totalProb - logL_;
                currentSStat[transcriptID] = contrib;
                auto it = smoothedSufficientStats_.find(transcriptID);
                if (it == smoothedSufficientStats_.end()) {
                    smoothedSufficientStats_[transcriptID] = std::make_tuple(contrib,1);
                } else {
                    std::get<0>(it->second) = logAdd(std::get<0>(it->second), contrib);
                    std::get<1>(it->second) += 1;
                }

            }

            if (sstatQueue_.size() > qlen_) {
                // S_i^L -= (S_{i-L} / L)
                auto& Sil = sstatQueue_.front();
                for (auto kv = Sil.begin(); kv != Sil.end(); ++kv) {
                    auto transcriptID = kv->first;
                    auto& lprob = kv->second;
                    auto it = smoothedSufficientStats_.find(transcriptID);
                    if (it == smoothedSufficientStats_.end()) {
                        //std::cerr << "\n\nWHAT\n\n\n";
                    } else if (std::get<0>(it->second) <= lprob or
                               approxEq(lprob, std::get<0>(it->second)) ) {//std::get<1>(it->second) == 1) {
                        if (!approxEq(std::get<0>(it->second), lprob) and std::get<1>(it->second) > 1) {
                            std::cerr << "hrmmmm .... remaining mass = " << std::get<0>(it->second) << " < lprob = " << lprob << "\n";
                        }
                        smoothedSufficientStats_.erase(it);
                    } else {
                        double before = std::get<0>(it->second);
                        double update = logSub(std::get<0>(it->second), lprob);
                        std::get<0>(it->second) = update;
                        std::get<1>(it->second) -= 1;
                        if (std::get<0>(it->second) == salmon::math::LOG_0) {
                            smoothedSufficientStats_.erase(it);
                        }
                    }
                }
                sstatQueue_.pop();
            }

            // make sure this is a copy!
            return smoothedSufficientStats_;
            // lock is freed here
        }

    private:
        size_t ct_;
        std::mutex ssmut_;
        std::queue<btree::btree_map<TranscriptID, double>> sstatQueue_;
        btree::btree_map<TranscriptID, std::tuple<double, size_t>> smoothedSufficientStats_;
        uint32_t qlen_;
        double logL_;
};

#endif // __SUFFICIENT_STATISTICS_QUEUE_HPP__