File: AlignmentModel.hpp

package info (click to toggle)
salmon 0.7.2%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,352 kB
  • ctags: 5,243
  • sloc: cpp: 42,341; ansic: 6,252; python: 228; makefile: 207; sh: 190
file content (111 lines) | stat: -rw-r--r-- 3,171 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#ifndef ALIGNMENT_MODEL
#define ALIGNMENT_MODEL

#include <mutex>
#include <atomic>
#include <memory>

// logger includes
#include "spdlog/spdlog.h"

#include "tbb/concurrent_vector.h"
#include "AtomicMatrix.hpp"

extern "C" {
#include "io_lib/scram.h"
#include "io_lib/os.h"
#undef max
#undef min
}

struct UnpairedRead;
struct ReadPair;
class Transcript;

class AlignmentModel{
public:
    AlignmentModel(double alpha, uint32_t readBins = 4);
    bool burnedIn();
    void burnedIn(bool burnedIn);

    /**
    *  For unpaired reads, update the error model to account
    *  for errors we've observed in this read pair.
    */
    void update(const UnpairedRead&, Transcript& ref, double p, double mass);

    /**
      * Compute the log-likelihood of the observed unpaired alignment given the
      * current error model.
      */
    double logLikelihood(const UnpairedRead&, Transcript& ref);

    /**
    *  For paired-end reads, update the error model to account
    *  for errors we've observed in this read pair.
    */
    void update(const ReadPair&, Transcript& ref, double p, double mass);

    /**
     * Compute the log-likelihood of the observed paire-end alignment given the
     * current error model.
     */
    double logLikelihood(const ReadPair&, Transcript& ref);


    bool hasIndel(UnpairedRead& r);
    bool hasIndel(ReadPair & r);

    void setLogger(std::shared_ptr<spdlog::logger> logger);
    bool hasLogger();

    void normalize();

private:

    enum AlignmentModelChar {
        ALN_A = 0,
        ALN_C = 1,
        ALN_G = 2,
        ALN_T = 3,
        ALN_DASH = 4,
        ALN_SOFT_CLIP = 5,
        ALN_HARD_CLIP = 6,
        ALN_PAD = 7,
        ALN_REF_SKIP = 8
    };

    void setBasesFromCIGAROp_(enum cigar_op op, size_t& curRefBase, size_t& curReadBase);
                              //std::stringstream& readStr, std::stringstream& matchStr,
                              //std::stringstream& refstr);

    // 11 states --- the 9 listed in "AlignmentModelChar" plus START (9) i
    // and END (10)
    constexpr static uint32_t numAlignmentStates() { return 82; }
    constexpr static uint32_t numStates = 9;
    constexpr static uint32_t startStateIdx = 81;

    /**
     * These functions, which work directly on bam_seq_t* types, drive the
     * update() and logLikelihood() methods above.
     */
    void update(bam_seq_t* read, Transcript& ref, double p, double mass, std::vector<AtomicMatrix<double>>& mismatchProfile);
    double logLikelihood(bam_seq_t* read, Transcript& ref, std::vector<AtomicMatrix<double>>& mismatchProfile);
    bool hasIndel(bam_seq_t* r);

    // NOTE: Do these need to be concurrent_vectors as before?
    // Store the mismatch probability tables for the left and right reads
    std::vector<AtomicMatrix<double>> transitionProbsLeft_;
    std::vector<AtomicMatrix<double>> transitionProbsRight_;

    bool isEnabled_;
    //size_t maxLen_;
    size_t readBins_;
    std::atomic<bool> burnedIn_;
    std::shared_ptr<spdlog::logger> logger_;
    // Maintain a mutex in case the error model wants to talk to the
    // console / log.
    std::mutex outputMutex_;
};

#endif // ERROR_MODEL