1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
#ifndef __LIBRARY_TYPE_DETECTOR__
#define __LIBRARY_TYPE_DETECTOR__
#include "spdlog/fmt/ostr.h"
#include "spdlog/fmt/fmt.h"
#include "spdlog/sinks/ostream_sink.h"
#include "spdlog/spdlog.h"
#include "LibraryFormat.hpp"
#include <atomic>
#include <mutex>
class LibraryTypeDetector {
public:
LibraryTypeDetector(ReadType type) : type_(type),
libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)) {}
LibraryTypeDetector(const LibraryTypeDetector& other) {
active_ = other.active_;
type_ = other.type_;
numSamplesNeeded_.store(other.numSamplesNeeded_.load());
libTypeCounts_ = std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1);
for (size_t i = 0; i < other.libTypeCounts_.size(); ++i) {
libTypeCounts_[i] = other.libTypeCounts_[i].load();
}
}
bool isActive() { return active_; }
bool canGuess() { return numSamplesNeeded_ <= 0; }
bool mostLikelyType(LibraryFormat& ifmt) {
bool ret{false};
if (mut_.try_lock()) {
if (active_) {
if (type_ == ReadType::SINGLE_END) {
uint64_t nf{0};
uint64_t nr{0};
for (size_t i = 0; i <= LibraryFormat::maxLibTypeID(); ++i) {
auto f = LibraryFormat::formatFromID(i);
auto c = libTypeCounts_[i].load();
nf += (f.strandedness == ReadStrandedness::S) ? c : 0;
nr += (f.strandedness == ReadStrandedness::A) ? c : 0;
}
double ratio = -1.0;
if (nf + nr > 0) {
ratio = static_cast<double>(nf) / (nf + nr);
}
ifmt.type = type_;
ifmt.orientation = ReadOrientation::NONE;
// If we have some issue computing this, leave as unstranded
if (ratio < 0.0) {
ifmt.strandedness = ReadStrandedness::U;
} else if (ratio < 0.3) {
// If we map to the forward strand < 30% of the time, we are antisesnse
ifmt.strandedness = ReadStrandedness::A;
} else if (ratio < 0.7) {
// Between 30% and 70% of the time, we are unstranded
ifmt.strandedness = ReadStrandedness::U;
} else {
// Greater than 70% of the time, we are sense
ifmt.strandedness = ReadStrandedness::S;
}
} else { // paired end
uint64_t nsf{0};
uint64_t nsr{0};
uint64_t ninward{0};
uint64_t noutward{0};
uint64_t nsame{0};
for (size_t i = 0; i <= LibraryFormat::maxLibTypeID(); ++i) {
auto f = LibraryFormat::formatFromID(i);
auto c = libTypeCounts_[i].load();
nsf += (f.strandedness == ReadStrandedness::S or
f.strandedness == ReadStrandedness::SA) ? c : 0;
nsr += (f.strandedness == ReadStrandedness::A or
f.strandedness == ReadStrandedness::AS) ? c : 0;
ninward += (f.orientation == ReadOrientation::TOWARD) ? c : 0;
noutward += (f.orientation == ReadOrientation::AWAY) ? c : 0;
nsame += (f.orientation == ReadOrientation::SAME) ? c : 0;
}
ifmt.type = type_;
if ((ninward + noutward + nsame > 0) and
(nsf + nsr > 0)){
auto numOrient = ninward + noutward + nsame;
double ratioIn = static_cast<double>(ninward) / numOrient;
double ratioOut = static_cast<double>(noutward) / numOrient;
double ratioSame = static_cast<double>(nsame) / numOrient;
ifmt.orientation = ReadOrientation::NONE;
bool same{false};
if (ratioIn >= ratioOut and ratioIn >= ratioSame) {
ifmt.orientation = ReadOrientation::TOWARD;
} else if (ratioOut >= ratioIn and ratioOut >= ratioSame) {
ifmt.orientation = ReadOrientation::AWAY;
} else {
ifmt.orientation = ReadOrientation::SAME;
same = true;
}
auto numStrand = nsf + nsr;
double ratioFW = static_cast<double>(nsf) / numStrand;
if (ratioFW < 0.3) {
ifmt.strandedness = (same) ? ReadStrandedness::A : ReadStrandedness::AS;
} else if (ratioFW < 0.7) {
ifmt.strandedness = ReadStrandedness::U;
} else {
ifmt.strandedness = (same) ? ReadStrandedness::S : ReadStrandedness::SA;
}
} else {
ifmt.orientation = ReadOrientation::TOWARD;
ifmt.strandedness = ReadStrandedness::U;
}
} // end paired-end
auto log = spdlog::get("jointLog");
log->info("Automatically detected most likely library type as {}", ifmt.toString());
active_ = false;
ret = true;
} // end if active_
mut_.unlock(); // release the lock
} // end try_lock()
return ret;
}
void addSample(LibraryFormat f) {
if (f.type == type_ and numSamplesNeeded_ >= 0) {
++libTypeCounts_[f.formatID()];
--numSamplesNeeded_;
}
}
private:
// set to false once we have guessed the type
bool active_{true};
std::mutex mut_;
// single or paired-end
ReadType type_;
// number of samples needed before we can guess a type
std::atomic<int64_t> numSamplesNeeded_{50000};
// the counts for each library type
std::vector<std::atomic<uint64_t>> libTypeCounts_;
};
#endif //__LIBRARY_TYPE_DETECTOR__
|