File: alignment.cpp

package info (click to toggle)
mothur 1.48.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,692 kB
  • sloc: cpp: 161,866; makefile: 122; sh: 31
file content (281 lines) | stat: -rwxr-xr-x 10,869 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
/*
 *  alignment.cpp
 *
 *  Created by Pat Schloss on 12/15/08.
 *  Copyright 2008 Patrick D. Schloss. All rights reserved.
 *
 *  This is a class for an abstract datatype for classes that implement various types of alignment	algorithms.
 *	As of 12/01/21 these included alignments based on needleman-wunsch, and the	Gotoh algorithms
 *  
 */

#include "alignmentcell.hpp"
#include "alignment.hpp"


/**************************************************************************************************/

Alignment::Alignment() {	m = MothurOut::getInstance();  /*	do nothing	*/	}

/**************************************************************************************************/

Alignment::Alignment(int A) : nCols(A), nRows(A) {
	try {
		m = MothurOut::getInstance();
		alignment.resize(nRows);			//	For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
		for(int i=0;i<nRows;i++){			//	matrix by initializing a matrix that is A x A.  By default we will set A
			alignment[i].resize(nCols);		//	at 2000 for 16S rRNA gene sequences
		}	
	}
	catch(exception& e) {
		m->errorOut(e, "Alignment", "Alignment");
		exit(1);
	}
}
/**************************************************************************************************/

Alignment::Alignment(int A, int nk) : nCols(A), nRows(A) {
    try {
        m = MothurOut::getInstance();
        alignment.resize(nRows);			//	For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
        for(int i=0;i<nRows;i++){			//	matrix by initializing a matrix that is A x A.  By default we will set A
            alignment[i].resize(nCols);		//	at 2000 for 16S rRNA gene sequences
        }
    }
    catch(exception& e) {
        m->errorOut(e, "Alignment", "Alignment");
        exit(1);
    }
}
/**************************************************************************************************/
//only gets bigger
void Alignment::resize(int A) {
	try {
		nCols = A;
		nRows = A;

		alignment.resize(nRows);			
		for(int i=0;i<nRows;i++){			
			alignment[i].resize(nCols);
		}
	}
	catch(exception& e) {
		m->errorOut(e, "Alignment", "resize");
		exit(1);
	}
}
/**************************************************************************************************/

void Alignment::traceBack(bool createBaseMap){			//	This traceback routine is used by the dynamic programming algorithms
	try {	
		BBaseMap.clear();
        ABaseMap.clear(); //	to fill the values of seqAaln and seqBaln
		seqAaln = "";
		seqBaln = "";
		int row = lB-1;
		int column = lA-1;
		//	seqAstart = 1;
		//	seqAend = column;
		
		AlignmentCell currentCell = alignment[row][column];	//	Start the traceback from the bottom-right corner of the
		//	matrix
		
		if(currentCell.prevCell == 'x'){	seqAaln = seqBaln = "NOALIGNMENT";		}//If there's an 'x' in the bottom-
		else{	//	right corner bail out because it means nothing got aligned
            int count = 0;
			while(currentCell.prevCell != 'x'){				//	while the previous cell isn't an 'x', keep going...
				
				if(currentCell.prevCell == 'u'){			//	if the pointer to the previous cell is 'u', go up in the
					seqAaln = '-' + seqAaln;				//	matrix.  this indicates that we need to insert a gap in
					seqBaln = seqB[row] + seqBaln;			//	seqA and a base in seqB
                    if (createBaseMap) { BBaseMap[row] = count; }
					//currentCell = alignment[--row][column];
                    --row;
				}
				else if(currentCell.prevCell == 'l'){		//	if the pointer to the previous cell is 'l', go to the left
					seqBaln = '-' + seqBaln;				//	in the matrix.  this indicates that we need to insert a gap
					seqAaln = seqA[column] + seqAaln;		//	in seqB and a base in seqA
                    if (createBaseMap) { ABaseMap[column] = count; }
					//currentCell = alignment[row][--column];
                    --column;
				}
				else{
					seqAaln = seqA[column] + seqAaln;		//	otherwise we need to go diagonally up and to the left,
					seqBaln = seqB[row] + seqBaln;			//	here we add a base to both alignments
                    if (createBaseMap) {
                        BBaseMap[row] = count;
                        ABaseMap[column] = count;
                    }
					//currentCell = alignment[--row][--column];
                    --row; --column;
				}
                if ((row >= 0) && (column >= 0)) { currentCell = alignment[row][column]; }
                else { break; }
                count++;
			}
		}
		
       
        pairwiseLength = seqAaln.length();
		seqAstart = 1;	seqAend = 0;
		seqBstart = 1;	seqBend = 0;
        
        if (createBaseMap) {
            //flip maps since we now know the total length
            map<int, int> newAMap;
            for (map<int, int>::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) {
                int spot = it->second;
                newAMap[pairwiseLength-spot-1] = it->first-1;
            }
            ABaseMap = newAMap;
            map<int, int> newBMap;
            for (map<int, int>::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) {
                int spot = it->second;
                newBMap[pairwiseLength-spot-1] = it->first-1;
            }
            BBaseMap = newBMap;
        }
        
		for(int i=0;i<seqAaln.length();i++){
			if(seqAaln[i] != '-' && seqBaln[i] == '-')		{	seqAstart++;	}
			else if(seqAaln[i] == '-' && seqBaln[i] != '-')	{	seqBstart++;	}
			else											{	break;			}
		}
		
		pairwiseLength -= (seqAstart + seqBstart - 2);
		
		for(int i=seqAaln.length()-1; i>=0;i--){
			if(seqAaln[i] != '-' && seqBaln[i] == '-')		{	seqAend++;		}
			else if(seqAaln[i] == '-' && seqBaln[i] != '-')	{	seqBend++;		}
			else											{	break;			}
		}
		pairwiseLength -= (seqAend + seqBend);
		
		seqAend = seqA.length() - seqAend - 1;
		seqBend = seqB.length() - seqBend - 1;
	}
	catch(exception& e) {
		m->errorOut(e, "Alignment", "traceBack");
		exit(1);
	}
}
/**************************************************************************************************/
//disables start and end postions and pairwise length
void Alignment::proteinTraceBack(vector<string> seqA, vector<AminoAcid> seqB){            //    This traceback routine is used by the dynamic programming algorithms
    try {
        BBaseMap.clear();
        ABaseMap.clear(); //    to fill the values of seqAaln and seqBaln
        seqAaln = "";
        seqBaln = "";
        int row = lB-1;
        int column = lA-1;
        
        AlignmentCell currentCell = alignment[row][column];    //    Start the traceback from the bottom-right corner of the
        //    matrix
        
        if(currentCell.prevCell == 'x'){    seqAaln = seqBaln = "NOALIGNMENT";        }//If there's an 'x' in the bottom-
        else{    //    right corner bail out because it means nothing got aligned
            int count = 0;
            while(currentCell.prevCell != 'x'){                //    while the previous cell isn't an 'x', keep going...
                
                if(currentCell.prevCell == 'u'){            //    if the pointer to the previous cell is 'u', go up in the
                    seqAaln = "---" + seqAaln;                //    matrix.  this indicates that we need to insert a gap in
                    seqBaln = seqB[row].getAmino() + seqBaln;            //    seqA and a base in seqB
                    --row;
                }
                else if(currentCell.prevCell == 'l'){        //    if the pointer to the previous cell is 'l', go to the left
                    seqBaln = '-' + seqBaln;                //    in the matrix.  this indicates that we need to insert a gap
                    seqAaln = seqA[column] + seqAaln;        //    in seqB and a base in seqA
                    --column;
                }
                else{
                    seqAaln = seqA[column] + seqAaln;        //    otherwise we need to go diagonally up and to the left,
                    seqBaln = seqB[row].getAmino() + seqBaln;            //    here we add a base to both alignments
                    --row; --column;
                }
                if ((row >= 0) && (column >= 0)) { currentCell = alignment[row][column]; }
                else { break; }
                count++;
            }
        }
        
        
        pairwiseLength = 0;
        seqAstart = 0;
        seqBstart = 0;
        seqAend = 0;
        seqBend = 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "Alignment", "proteinTraceBack");
        exit(1);
    }
}
/**************************************************************************************************/

Alignment::~Alignment(){
	try {
		for (int i = 0; i < alignment.size(); i++) {
			for (int j = (alignment[i].size()-1); j >= 0; j--) {  alignment[i].pop_back();  }
		}
        alignment.clear();
	}
	catch(exception& e) {
		m->errorOut(e, "Alignment", "~Alignment");
		exit(1);
	}
}

/**************************************************************************************************/

string Alignment::getSeqAAln(){
	return seqAaln;										//	this is called to get the alignment of seqA
}

/**************************************************************************************************/

string Alignment::getSeqBAln(){
	return seqBaln;										//	this is called to get the alignment of seqB							
}

/**************************************************************************************************/

int Alignment::getCandidateStartPos(){
	return seqAstart;									//	this is called to report the quality of the alignment
}

/**************************************************************************************************/

int Alignment::getCandidateEndPos(){
	return seqAend;										//	this is called to report the quality of the alignment
}

/**************************************************************************************************/

int Alignment::getTemplateStartPos(){
	return seqBstart;									//	this is called to report the quality of the alignment
}
/**************************************************************************************************/

map<int, int> Alignment::getSeqAAlnBaseMap(){
	return ABaseMap;									
}
/**************************************************************************************************/

map<int, int> Alignment::getSeqBAlnBaseMap(){
	return BBaseMap;									
}
/**************************************************************************************************/

int Alignment::getTemplateEndPos(){
	return seqBend;										//	this is called to report the quality of the alignment
}

/**************************************************************************************************/

int Alignment::getPairwiseLength(){
	return pairwiseLength;								//	this is the pairwise alignment length
}

/**************************************************************************************************/