1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
#ifndef __TRANSCRIPT_CLUSTER_HPP__
#define __TRANSCRIPT_CLUSTER_HPP__
#include <iostream>
#include <atomic>
#include <vector>
#include <list>
#include <boost/pending/disjoint_sets.hpp>
#include <boost/dynamic_bitset.hpp>
#include "SalmonMath.hpp"
class ClusterForest;
class TranscriptCluster {
friend class ClusterForest;
public:
TranscriptCluster() : members_(std::list<size_t>()), count_(0), logMass_(salmon::math::LOG_0), active_(true) {}
TranscriptCluster(size_t initialMember) : members_(std::list<size_t>(1,initialMember)), count_(0),
logMass_(salmon::math::LOG_0), active_(true) {
}
//void incrementCount(size_t num) { count_ += num; }
void incrementCount(double num) { count_ += num; }
void addMass(double logNewMass) { logMass_ = salmon::math::logAdd(logMass_, logNewMass); }
void merge(TranscriptCluster& other) {
members_.splice(members_.begin(), other.members_);
logMass_ = salmon::math::logAdd(logMass_, other.logMass_);
count_ += other.count_;
}
std::list<size_t>& members() { return members_; }
//size_t numHits() { return count_.load(); }
double numHits() { return count_; }
bool isActive() { return active_; }
void deactivate() { active_ = false; }
double logMass() { return logMass_; }
// Adapted from https://github.com/adarob/eXpress/blob/master/src/targets.cpp
void projectToPolytope(std::vector<Transcript>& allTranscripts) {
using salmon::math::approxEqual;
// The transcript belonging to this cluster
double clusterCounts{static_cast<double>(count_)};
std::vector<Transcript*> transcripts;
for (auto tid : members_) {
transcripts.push_back(&allTranscripts[tid]);
}
// The cluster size
size_t clusterSize = transcripts.size();
size_t round{0};
boost::dynamic_bitset<> polytopeBound(clusterSize);
while(true) {
double unboundCounts{0.0};
double boundCounts{0.0};
for (size_t i = 0; i < clusterSize; ++i) {
Transcript* transcript = transcripts[i];
if (transcript->projectedCounts > transcript->totalCounts) {
transcript->projectedCounts = transcript->totalCounts;
polytopeBound[i] = true;
} else if (transcript->projectedCounts < transcript->uniqueCounts) {
transcript->projectedCounts = transcript->uniqueCounts;
polytopeBound[i] = true;
}
if (polytopeBound[i]) {
boundCounts += transcript->projectedCounts;
} else {
unboundCounts += transcript->projectedCounts;
}
}
if (approxEqual(unboundCounts + boundCounts, clusterCounts)) {
return;
}
if (unboundCounts == 0) {
polytopeBound = boost::dynamic_bitset<>(clusterSize);
unboundCounts = boundCounts;
boundCounts = 0;
}
double normalizer = (clusterCounts - boundCounts) / unboundCounts;
for (size_t i = 0; i < clusterSize; ++i) {
if (!polytopeBound[i]) {
Transcript* transcript = transcripts[i];
transcript->projectedCounts *= normalizer;
}
}
++round;
if (round > 5000) {
return;
}
} // end while
}
private:
std::list<size_t> members_;
//std::atomic<size_t> count_;
double count_;
double logMass_;
bool active_;
};
#endif // __TRANSCRIPT_CLUSTER_HPP__
|