File: TranscriptCluster.hpp

package info (click to toggle)
salmon 0.7.2%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,352 kB
  • ctags: 5,243
  • sloc: cpp: 42,341; ansic: 6,252; python: 228; makefile: 207; sh: 190
file content (111 lines) | stat: -rw-r--r-- 3,764 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#ifndef __TRANSCRIPT_CLUSTER_HPP__
#define __TRANSCRIPT_CLUSTER_HPP__

#include <iostream>
#include <atomic>
#include <vector>
#include <list>

#include <boost/pending/disjoint_sets.hpp>
#include <boost/dynamic_bitset.hpp>

#include "SalmonMath.hpp"

class ClusterForest;

class TranscriptCluster {
    friend class ClusterForest;
public:
    TranscriptCluster() : members_(std::list<size_t>()), count_(0), logMass_(salmon::math::LOG_0), active_(true) {}
    TranscriptCluster(size_t initialMember) : members_(std::list<size_t>(1,initialMember)), count_(0),
                                              logMass_(salmon::math::LOG_0), active_(true) {
    }

    //void incrementCount(size_t num) { count_ += num; }
    void incrementCount(double num) { count_ += num; }
    void addMass(double logNewMass) { logMass_ = salmon::math::logAdd(logMass_, logNewMass); }
    void merge(TranscriptCluster& other) {
        members_.splice(members_.begin(), other.members_);
        logMass_ = salmon::math::logAdd(logMass_, other.logMass_);
        count_ += other.count_;
    }

    std::list<size_t>& members() { return members_; }
    //size_t numHits() { return count_.load(); }
    double numHits() { return count_; }
    bool isActive() { return active_; }
    void deactivate() { active_ = false; }
    double logMass() { return logMass_; }

    // Adapted from https://github.com/adarob/eXpress/blob/master/src/targets.cpp
    void projectToPolytope(std::vector<Transcript>& allTranscripts) {
        using salmon::math::approxEqual;

        // The transcript belonging to this cluster
        double clusterCounts{static_cast<double>(count_)};
        std::vector<Transcript*> transcripts;
        for (auto tid : members_) {
            transcripts.push_back(&allTranscripts[tid]);
        }
        // The cluster size
        size_t clusterSize = transcripts.size();
        size_t round{0};
        boost::dynamic_bitset<> polytopeBound(clusterSize);
        while(true) {
            double unboundCounts{0.0};
            double boundCounts{0.0};
            for (size_t i = 0; i < clusterSize; ++i) {
                Transcript* transcript = transcripts[i];
                if (transcript->projectedCounts > transcript->totalCounts) {
                    transcript->projectedCounts = transcript->totalCounts;
                    polytopeBound[i] = true;
                } else if (transcript->projectedCounts < transcript->uniqueCounts) {
                    transcript->projectedCounts = transcript->uniqueCounts;
                    polytopeBound[i] = true;
                }

                if (polytopeBound[i]) {
                    boundCounts += transcript->projectedCounts;
                } else {
                    unboundCounts += transcript->projectedCounts;
                }
            }


            if (approxEqual(unboundCounts + boundCounts, clusterCounts)) {
                return;
            }

            if (unboundCounts == 0) {
                polytopeBound = boost::dynamic_bitset<>(clusterSize);
                unboundCounts = boundCounts;
                boundCounts = 0;
            }

            double normalizer = (clusterCounts - boundCounts) / unboundCounts;
            for (size_t i = 0; i < clusterSize; ++i) {
                if (!polytopeBound[i]) {
                    Transcript* transcript = transcripts[i];
                    transcript->projectedCounts *= normalizer;
                }
            }
            ++round;
            if (round > 5000) {
                return;
            }

        } // end while

    }

private:
    std::list<size_t> members_;
    //std::atomic<size_t> count_;
    double count_;
    double logMass_;
    bool active_;
};

#endif // __TRANSCRIPT_CLUSTER_HPP__