File: BandedSmithWaterman.h

package info (click to toggle)
libsmithwaterman 0.0+git20160702.2610e25-11
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 296 kB
  • sloc: cpp: 2,115; makefile: 59; sh: 16
file content (111 lines) | stat: -rw-r--r-- 4,545 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#pragma once

#include <iostream>
#include <algorithm>
#include <memory>
//#include "Alignment.h"
#include "Mosaik.h"
//#include "HashRegion.h"
#include <string.h>
#include <stdio.h>
#include <sstream>
#include <string>

using namespace std;

#define MOSAIK_NUM_NUCLEOTIDES 26
#define GAP '-'

typedef unsigned char DirectionType;
typedef unsigned char PositionType;

struct ElementInfo {
	unsigned int Direction             : 2;
	unsigned int mSizeOfVerticalGaps   : 15;
	unsigned int mSizeOfHorizontalGaps : 15;
};

class CBandedSmithWaterman {
public:
	// constructor
	CBandedSmithWaterman(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty, unsigned int bandWidth);
	// destructor
	~CBandedSmithWaterman(void);
	// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
	void Align(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr);
	// enables homo-polymer scoring
	void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
private:
	// calculates the score during the forward algorithm
	float CalculateScore(const string& s1, const string& s2, const unsigned int rowNum, const unsigned int columnNum, float& currentQueryGapScore, const unsigned int rowOffset, const unsigned int columnOffset);
	// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
	void CreateScoringMatrix(void);
	// corrects the homopolymer gap order for forward alignments
	void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
	// returns the maximum floating point number
	static inline float MaxFloats(const float& a, const float& b, const float& c);
	// reinitializes the matrices
	void ReinitializeMatrices(const PositionType& positionType, const unsigned int& s1Length, const unsigned int& s2Length, const pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr);
	// performs the backtrace algorithm
	void Traceback(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, unsigned int bestRow, unsigned int bestColumn, const unsigned int rowOffset, const unsigned int columnOffset);
	// updates the best score during the forward algorithm
	inline void UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score);
	// our simple scoring matrix
	float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
	// keep track of maximum initialized sizes
	unsigned int mCurrentMatrixSize;
	unsigned int mCurrentAnchorSize;
	unsigned int mCurrentAQSumSize;
	unsigned int mBandwidth;
	// define our backtrace directions
	const static DirectionType Directions_STOP;
	const static DirectionType Directions_LEFT;
	const static DirectionType Directions_DIAGONAL;
	const static DirectionType Directions_UP;
	// store the backtrace pointers
	ElementInfo* mPointers;
	// define our position types
	const static PositionType Position_REF_AND_QUERY_ZERO;
	const static PositionType Position_REF_ZERO;
	const static PositionType Position_QUERY_ZERO;
	const static PositionType Position_REF_AND_QUERO_NONZERO;
	// define scoring constants
	const float mMatchScore;
	const float mMismatchScore;
	const float mGapOpenPenalty;
	const float mGapExtendPenalty;
	// score if xi aligns to a gap after yi
	float* mAnchorGapScores;
	// best score of alignment x1...xi to y1...yi
	float* mBestScores;
	// our reversed alignment
	char* mReversedAnchor;
	char* mReversedQuery;
	// define static constants
	static const float FLOAT_NEGATIVE_INFINITY;
	// toggles the use of the homo-polymer gap open penalty
	bool mUseHomoPolymerGapOpenPenalty;
	float mHomoPolymerGapOpenPenalty;
};

// returns the maximum floating point number
inline float CBandedSmithWaterman::MaxFloats(const float& a, const float& b, const float& c) {
	float max = 0.0f;
	if(a > max) max = a;
	if(b > max) max = b;
	if(c > max) max = c;
	return max;
}

// updates the best score during the forward algorithm
inline void CBandedSmithWaterman::UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score) {

	//const unsigned int row    = rowNum + rowOffset;
	//const unsigned int column = columnOffset - rowNum + columnNum;

	if(score > bestScore) {
		bestRow    = rowNum;
		bestColumn = columnNum;
		bestScore  = score;
	}
}