File: SmithWatermanGotoh.h

package info (click to toggle)
libsmithwaterman 0.0+git20160702.2610e25-11
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 296 kB
  • sloc: cpp: 2,115; makefile: 59; sh: 16
file content (103 lines) | stat: -rw-r--r-- 3,970 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#pragma once

#include <iostream>
#include <algorithm>
#include <memory>
//#include "Alignment.h"
#include "Mosaik.h"
#include <stdio.h>
#include <string.h>
#include <sstream>
#include <string>
#include "disorder.h"
#include "Repeats.h"
#include "LeftAlign.h"

using namespace std;

#define MOSAIK_NUM_NUCLEOTIDES 26
#define GAP '-'

class CSmithWatermanGotoh {
public:
    // constructor
    CSmithWatermanGotoh(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty);
    // destructor
    ~CSmithWatermanGotoh(void);
    // aligns the query sequence to the reference using the Smith Waterman Gotoh algorithm
    void Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2);
    // enables homo-polymer scoring
    void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
    // enables non-repeat gap open penalty
    void EnableEntropyGapPenalty(float enGapOpenPenalty);
    // enables repeat gap extension penalty
    void EnableRepeatGapExtensionPenalty(float rGapExtensionPenalty, float rMaxGapRepeatExtensionPenaltyFactor = 10);
    // record the best score for external use
    float BestScore;
private:
    // creates a simple scoring matrix to align the nucleotides and the ambiguity code N
    void CreateScoringMatrix(void);
    // corrects the homopolymer gap order for forward alignments
    void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
    // returns the maximum floating point number
    static inline float MaxFloats(const float& a, const float& b, const float& c);
    // our simple scoring matrix
    float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
    // keep track of maximum initialized sizes
    unsigned int mCurrentMatrixSize;
    unsigned int mCurrentAnchorSize;
    unsigned int mCurrentQuerySize;
    unsigned int mCurrentAQSumSize;
    // define our traceback directions
    // N.B. This used to be defined as an enum, but gcc doesn't like being told
    // which storage class to use
    const static char Directions_STOP;
    const static char Directions_LEFT;
    const static char Directions_DIAGONAL;
    const static char Directions_UP;
    // repeat structure determination
    const static int repeat_size_max;
    // define scoring constants
    const float mMatchScore;
    const float mMismatchScore;
    const float mGapOpenPenalty;
    const float mGapExtendPenalty;
    // store the backtrace pointers
    char* mPointers;
    // store the vertical gap sizes - assuming gaps are not longer than 32768 bases long
    short* mSizesOfVerticalGaps;
    // store the horizontal gap sizes - assuming gaps are not longer than 32768 bases long
    short* mSizesOfHorizontalGaps;	
    // score if xi aligns to a gap after yi
    float* mQueryGapScores;
    // best score of alignment x1...xi to y1...yi
    float* mBestScores;
    // our reversed alignment
    char* mReversedAnchor;
    char* mReversedQuery;
    // define static constants
    static const float FLOAT_NEGATIVE_INFINITY;
    // toggles the use of the homo-polymer gap open penalty
    bool mUseHomoPolymerGapOpenPenalty;
    // specifies the homo-polymer gap open penalty
    float mHomoPolymerGapOpenPenalty;
    // toggles the use of the entropy gap open penalty
    bool mUseEntropyGapOpenPenalty;
    // specifies the entropy gap open penalty (multiplier)
    float mEntropyGapOpenPenalty;
    // toggles the use of the repeat gap extension penalty
    bool mUseRepeatGapExtensionPenalty;
    // specifies the repeat gap extension penalty
    float mRepeatGapExtensionPenalty;
    // specifies the max repeat gap extension penalty
    float mMaxRepeatGapExtensionPenalty;
};

// returns the maximum floating point number
inline float CSmithWatermanGotoh::MaxFloats(const float& a, const float& b, const float& c) {
    float max = 0.0f;
    if(a > max) max = a;
    if(b > max) max = b;
    if(c > max) max = c;
    return max;
}