File: ProbabilityDistance.h

package info (click to toggle)
snap-aligner 1.0.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,988 kB
  • sloc: cpp: 36,500; ansic: 5,239; python: 227; makefile: 85; sh: 28
file content (56 lines) | stat: -rw-r--r-- 2,167 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#pragma once

#include "Read.h"

//
// Similar to BoundedStringDistance and LandauVishkin, but computes the probability of a read
// string being generated from a reference sequence given an error model, mutation model and
// the quality scores of the bases in the read. For now, we assume that the statistical model
// for errors is one with a "gap open probability" and a fixed "gap extension probability"
// per base. For substitions, we could have different probabilities for each transition, but
// we assume that they all have the same probability right now.
//
class ProbabilityDistance {
public:
    static const int MAX_READ = MAX_READ_LENGTH;
    static const int MAX_SHIFT = 20;

    ProbabilityDistance(double snpProb, double gapOpenProb, double gapExtensionProb);

    int compute(
            const char *reference,
            const char *read,
            const char *quality,
            int readLen,
            int maxStartShift,
            int maxTotalShift,
            double *matchProbability);

private:
    double snpLogProb;
    double gapOpenLogProb;
    double gapExtensionLogProb;

    double matchLogProb[256];      // [baseQuality]
    double mismatchLogProb[256];   // [baseQuality]

#define NO_PROB  -1000000.0;  // A really negative log probability -- basically zero.  VC compiler won't allow static const double in a class.

    enum GapStatus { NO_GAP, READ_GAP, REF_GAP };

    // d[readPos][shift][gapStatus] is the best possible log probability for aligning the
    // substring read[0..readPos] to reference[?..readPos + shift]. The "?" in reference is
    // because we allow starting an alignment from reference[-maxStartShift..maxStartShift]
    // instead of just reference[0], to deal with indels toward the start of the read.
    double d[MAX_READ][2*MAX_SHIFT+1][3];   // [readPos][shift][gapStatus]

    // A state in the D array, used for backtracking pointers
    struct State {
        int readPos;
        int shift;
        int gapStatus;
    };

    // Previous state in our dynamic program, for backtracking to print CIGAR strings
    State prev[MAX_READ][2*MAX_SHIFT+1][3];   // [readPos][shift][gapStatus]
};