1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
|
/*
* alignment.cpp
*
* Created by Pat Schloss on 12/15/08.
* Copyright 2008 Patrick D. Schloss. All rights reserved.
*
* This is a class for an abstract datatype for classes that implement various types of alignment algorithms.
* As of 12/01/21 these included alignments based on needleman-wunsch, and the Gotoh algorithms
*
*/
#include "alignmentcell.hpp"
#include "alignment.hpp"
/**************************************************************************************************/
Alignment::Alignment() { m = MothurOut::getInstance(); /* do nothing */ }
/**************************************************************************************************/
Alignment::Alignment(int A) : nCols(A), nRows(A) {
try {
m = MothurOut::getInstance();
alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
for(int i=0;i<nRows;i++){ // matrix by initializing a matrix that is A x A. By default we will set A
alignment[i].resize(nCols); // at 2000 for 16S rRNA gene sequences
}
}
catch(exception& e) {
m->errorOut(e, "Alignment", "Alignment");
exit(1);
}
}
/**************************************************************************************************/
Alignment::Alignment(int A, int nk) : nCols(A), nRows(A) {
try {
m = MothurOut::getInstance();
alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
for(int i=0;i<nRows;i++){ // matrix by initializing a matrix that is A x A. By default we will set A
alignment[i].resize(nCols); // at 2000 for 16S rRNA gene sequences
}
}
catch(exception& e) {
m->errorOut(e, "Alignment", "Alignment");
exit(1);
}
}
/**************************************************************************************************/
//only gets bigger
void Alignment::resize(int A) {
try {
nCols = A;
nRows = A;
alignment.resize(nRows);
for(int i=0;i<nRows;i++){
alignment[i].resize(nCols);
}
}
catch(exception& e) {
m->errorOut(e, "Alignment", "resize");
exit(1);
}
}
/**************************************************************************************************/
void Alignment::traceBack(bool createBaseMap){ // This traceback routine is used by the dynamic programming algorithms
try {
BBaseMap.clear();
ABaseMap.clear(); // to fill the values of seqAaln and seqBaln
seqAaln = "";
seqBaln = "";
int row = lB-1;
int column = lA-1;
// seqAstart = 1;
// seqAend = column;
AlignmentCell currentCell = alignment[row][column]; // Start the traceback from the bottom-right corner of the
// matrix
if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom-
else{ // right corner bail out because it means nothing got aligned
int count = 0;
while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going...
if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the
seqAaln = '-' + seqAaln; // matrix. this indicates that we need to insert a gap in
seqBaln = seqB[row] + seqBaln; // seqA and a base in seqB
if (createBaseMap) { BBaseMap[row] = count; }
//currentCell = alignment[--row][column];
--row;
}
else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left
seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap
seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA
if (createBaseMap) { ABaseMap[column] = count; }
//currentCell = alignment[row][--column];
--column;
}
else{
seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left,
seqBaln = seqB[row] + seqBaln; // here we add a base to both alignments
if (createBaseMap) {
BBaseMap[row] = count;
ABaseMap[column] = count;
}
//currentCell = alignment[--row][--column];
--row; --column;
}
if ((row >= 0) && (column >= 0)) { currentCell = alignment[row][column]; }
else { break; }
count++;
}
}
pairwiseLength = seqAaln.length();
seqAstart = 1; seqAend = 0;
seqBstart = 1; seqBend = 0;
if (createBaseMap) {
//flip maps since we now know the total length
map<int, int> newAMap;
for (map<int, int>::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) {
int spot = it->second;
newAMap[pairwiseLength-spot-1] = it->first-1;
}
ABaseMap = newAMap;
map<int, int> newBMap;
for (map<int, int>::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) {
int spot = it->second;
newBMap[pairwiseLength-spot-1] = it->first-1;
}
BBaseMap = newBMap;
}
for(int i=0;i<seqAaln.length();i++){
if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAstart++; }
else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBstart++; }
else { break; }
}
pairwiseLength -= (seqAstart + seqBstart - 2);
for(int i=seqAaln.length()-1; i>=0;i--){
if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAend++; }
else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBend++; }
else { break; }
}
pairwiseLength -= (seqAend + seqBend);
seqAend = seqA.length() - seqAend - 1;
seqBend = seqB.length() - seqBend - 1;
}
catch(exception& e) {
m->errorOut(e, "Alignment", "traceBack");
exit(1);
}
}
/**************************************************************************************************/
//disables start and end postions and pairwise length
void Alignment::proteinTraceBack(vector<string> seqA, vector<AminoAcid> seqB){ // This traceback routine is used by the dynamic programming algorithms
try {
BBaseMap.clear();
ABaseMap.clear(); // to fill the values of seqAaln and seqBaln
seqAaln = "";
seqBaln = "";
int row = lB-1;
int column = lA-1;
AlignmentCell currentCell = alignment[row][column]; // Start the traceback from the bottom-right corner of the
// matrix
if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom-
else{ // right corner bail out because it means nothing got aligned
int count = 0;
while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going...
if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the
seqAaln = "---" + seqAaln; // matrix. this indicates that we need to insert a gap in
seqBaln = seqB[row].getAmino() + seqBaln; // seqA and a base in seqB
--row;
}
else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left
seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap
seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA
--column;
}
else{
seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left,
seqBaln = seqB[row].getAmino() + seqBaln; // here we add a base to both alignments
--row; --column;
}
if ((row >= 0) && (column >= 0)) { currentCell = alignment[row][column]; }
else { break; }
count++;
}
}
pairwiseLength = 0;
seqAstart = 0;
seqBstart = 0;
seqAend = 0;
seqBend = 0;
}
catch(exception& e) {
m->errorOut(e, "Alignment", "proteinTraceBack");
exit(1);
}
}
/**************************************************************************************************/
Alignment::~Alignment(){
try {
for (int i = 0; i < alignment.size(); i++) {
for (int j = (alignment[i].size()-1); j >= 0; j--) { alignment[i].pop_back(); }
}
alignment.clear();
}
catch(exception& e) {
m->errorOut(e, "Alignment", "~Alignment");
exit(1);
}
}
/**************************************************************************************************/
string Alignment::getSeqAAln(){
return seqAaln; // this is called to get the alignment of seqA
}
/**************************************************************************************************/
string Alignment::getSeqBAln(){
return seqBaln; // this is called to get the alignment of seqB
}
/**************************************************************************************************/
int Alignment::getCandidateStartPos(){
return seqAstart; // this is called to report the quality of the alignment
}
/**************************************************************************************************/
int Alignment::getCandidateEndPos(){
return seqAend; // this is called to report the quality of the alignment
}
/**************************************************************************************************/
int Alignment::getTemplateStartPos(){
return seqBstart; // this is called to report the quality of the alignment
}
/**************************************************************************************************/
map<int, int> Alignment::getSeqAAlnBaseMap(){
return ABaseMap;
}
/**************************************************************************************************/
map<int, int> Alignment::getSeqBAlnBaseMap(){
return BBaseMap;
}
/**************************************************************************************************/
int Alignment::getTemplateEndPos(){
return seqBend; // this is called to report the quality of the alignment
}
/**************************************************************************************************/
int Alignment::getPairwiseLength(){
return pairwiseLength; // this is the pairwise alignment length
}
/**************************************************************************************************/
|