1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
#ifndef ALIGNMENT_MODEL
#define ALIGNMENT_MODEL
#include <mutex>
#include <atomic>
#include <memory>
// logger includes
#include "spdlog/spdlog.h"
#include "tbb/concurrent_vector.h"
#include "AtomicMatrix.hpp"
extern "C" {
#include "io_lib/scram.h"
#include "io_lib/os.h"
#undef max
#undef min
}
struct UnpairedRead;
struct ReadPair;
class Transcript;
class AlignmentModel{
public:
AlignmentModel(double alpha, uint32_t readBins = 4);
bool burnedIn();
void burnedIn(bool burnedIn);
/**
* For unpaired reads, update the error model to account
* for errors we've observed in this read pair.
*/
void update(const UnpairedRead&, Transcript& ref, double p, double mass);
/**
* Compute the log-likelihood of the observed unpaired alignment given the
* current error model.
*/
double logLikelihood(const UnpairedRead&, Transcript& ref);
/**
* For paired-end reads, update the error model to account
* for errors we've observed in this read pair.
*/
void update(const ReadPair&, Transcript& ref, double p, double mass);
/**
* Compute the log-likelihood of the observed paire-end alignment given the
* current error model.
*/
double logLikelihood(const ReadPair&, Transcript& ref);
bool hasIndel(UnpairedRead& r);
bool hasIndel(ReadPair & r);
void setLogger(std::shared_ptr<spdlog::logger> logger);
bool hasLogger();
void normalize();
private:
enum AlignmentModelChar {
ALN_A = 0,
ALN_C = 1,
ALN_G = 2,
ALN_T = 3,
ALN_DASH = 4,
ALN_SOFT_CLIP = 5,
ALN_HARD_CLIP = 6,
ALN_PAD = 7,
ALN_REF_SKIP = 8
};
void setBasesFromCIGAROp_(enum cigar_op op, size_t& curRefBase, size_t& curReadBase);
//std::stringstream& readStr, std::stringstream& matchStr,
//std::stringstream& refstr);
// 11 states --- the 9 listed in "AlignmentModelChar" plus START (9) i
// and END (10)
constexpr static uint32_t numAlignmentStates() { return 82; }
constexpr static uint32_t numStates = 9;
constexpr static uint32_t startStateIdx = 81;
/**
* These functions, which work directly on bam_seq_t* types, drive the
* update() and logLikelihood() methods above.
*/
void update(bam_seq_t* read, Transcript& ref, double p, double mass, std::vector<AtomicMatrix<double>>& mismatchProfile);
double logLikelihood(bam_seq_t* read, Transcript& ref, std::vector<AtomicMatrix<double>>& mismatchProfile);
bool hasIndel(bam_seq_t* r);
// NOTE: Do these need to be concurrent_vectors as before?
// Store the mismatch probability tables for the left and right reads
std::vector<AtomicMatrix<double>> transitionProbsLeft_;
std::vector<AtomicMatrix<double>> transitionProbsRight_;
bool isEnabled_;
//size_t maxLen_;
size_t readBins_;
std::atomic<bool> burnedIn_;
std::shared_ptr<spdlog::logger> logger_;
// Maintain a mutex in case the error model wants to talk to the
// console / log.
std::mutex outputMutex_;
};
#endif // ERROR_MODEL
|