1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
/*
* blastalign.cpp
*
*
* Created by Pat Schloss on 12/16/08.
* Copyright 2008 Patrick D. Schloss. All rights reserved.
*
* This is a basic alignment method that gets the blast program to do the heavy lifting. In the future, we should
* probably incorporate NCBI's library so that we don't have to call on a user-supplied executable. This is a child
* of the Alignment class, which requires a constructor and align method.
*
*/
#include "alignment.hpp"
#include "blastalign.hpp"
//**************************************************************************************************/
BlastAlignment::BlastAlignment(float go, float ge, float ma, float mm) :
match(ma), // This is the score to award for two nucleotides matching (match >= 0)
mismatch(mm) // This is the penalty to assess for a mismatch (mismatch <= 0)
{
path = m->argv;
path = path.substr(0, (path.find_last_of('m')));
gapOpen = abs(go); // This is the penalty to assess for opening a gap (gapOpen >= 0)
gapExtend = abs(ge); // This is the penalty to assess for extending a gap (gapExtend >= 0)
int randNumber = rand();
candidateFileName = toString(randNumber) + ".candidate";
templateFileName = toString(randNumber) + ".template";
blastFileName = toString(randNumber) + ".pairwise";
}
//**************************************************************************************************/
BlastAlignment::~BlastAlignment(){ // The desctructor should clean up by removing the temporary
m->mothurRemove(candidateFileName); // files used to run bl2seq
m->mothurRemove(templateFileName);
m->mothurRemove(blastFileName);
}
//**************************************************************************************************/
void BlastAlignment::align(string seqA, string seqB){ //Use blastn to align the two sequences
ofstream candidateFile(candidateFileName.c_str()); // Write the sequence to be aligned to a temporary candidate seq file
candidateFile << ">candidate" << endl << seqA << endl;
candidateFile.close();
ofstream templateFile(templateFileName.c_str()); // Write the unaligned template sequence to a temporary candidate seq file
templateFile << ">template" << endl << seqB << endl;
templateFile.close();
// The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
// that we don't want to apply any kind of complexity filtering (-F F)
string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend);
system(blastCommand.c_str()); // Here we assume that "bl2seq" is in the users path or in the same folder as
// this executable
setPairwiseSeqs();
}
/**************************************************************************************************/
void BlastAlignment::setPairwiseSeqs(){ // This method call assigns the blast generated alignment
// to the pairwise entry in the Sequence class for the
// candidate and template Sequence objects
ifstream blastFile;
m->openInputFile(blastFileName, blastFile);
seqAaln = "";
seqBaln = "";
int candidateLength, templateLength;
char d;
string candidateName, templateName;
while((d=blastFile.get()) != '='){}
blastFile >> candidateName; // Get the candidate sequence name from flatfile
while((d=blastFile.get()) != '('){}
blastFile >> candidateLength; // Get the candidate sequence length from flatfile
while((d=blastFile.get())){
if(d == '>'){
blastFile >> templateName; // Get the template sequence name from flatfile
break;
}
else if(d == '*'){ // We go here if there is no significant match
seqAstart = 0;
seqBstart = 0;
seqAend = 0;
seqBend = 0;
pairwiseLength = 0;
// string dummy;
// while(dummy != "query:"){ m->mothurOut(dummy, ""); m->mothurOutEndLine(); blastFile >> dummy; }
// blastFile >> seqBend;
// m->mothurOut(toString(seqBend), ""); m->mothurOutEndLine();
// for(int i=0;i<seqBend;i++){
// seqAaln += 'Z';
// seqBaln += 'X';
// }
// pairwiseLength = 0;
return;
}
}
while((d=blastFile.get()) != '='){}
blastFile >> templateLength; // Get the template sequence length from flatfile
while((d=blastFile.get()) != 'Q'){} // Suck up everything else until we get to the start of the alignment
int queryStart, sbjctStart, queryEnd, sbjctEnd;
string queryLabel, sbjctLabel, query, sbjct;
blastFile >> queryLabel; queryLabel = 'Q' + queryLabel;
while(queryLabel == "Query:"){
blastFile >> queryStart >> query >> queryEnd;
while((d=blastFile.get()) != 'S'){};
blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
if(seqAaln == ""){
seqAstart = queryStart;
seqBstart = sbjctStart;
}
seqAaln += query; // concatenate each line of the sequence to what we already have
seqBaln += sbjct; // for the query and template (subject) sequence
blastFile >> queryLabel;
}
seqAend = queryEnd;
seqBend = sbjctEnd;
pairwiseLength = seqAaln.length();
for(int i=1;i<seqBstart;i++){ // Since the alignments don't always start at (1, 1), we need to pad
seqAaln = 'Z' + seqAaln; // the sequences so that they start at the same point
seqBaln = 'X' + seqBaln;
}
for(int i=seqBend+1;i<=templateLength;i++){ // since the sequences don't necessarily end at the same point, we
seqAaln += 'Z'; // again need ot pad the sequences so that they extend to the length
seqBaln += 'X'; // of the template sequence
}
blastFile.close();
}
//**************************************************************************************************/
|