File: blastalign.cpp

package info (click to toggle)
mothur 1.33.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 11,248 kB
  • ctags: 12,231
  • sloc: cpp: 152,046; fortran: 665; makefile: 74; sh: 34
file content (159 lines) | stat: -rw-r--r-- 5,859 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/*
 *  blastalign.cpp
 *  
 *
 *  Created by Pat Schloss on 12/16/08.
 *  Copyright 2008 Patrick D. Schloss. All rights reserved.
 *
 *	This is a basic alignment method that gets the blast program to do the heavy lifting.  In the future, we should
 *	probably incorporate NCBI's library so that we don't have to call on a user-supplied executable.  This is a child
 *	of the Alignment class, which requires a constructor and align method.
 *
 */


#include "alignment.hpp"
#include "blastalign.hpp"


//**************************************************************************************************/

BlastAlignment::BlastAlignment(float go, float ge, float ma, float mm) : 
			match(ma),				//	This is the score to award for two nucleotides matching (match >= 0)
			mismatch(mm)			//	This is the penalty to assess for a mismatch (mismatch <= 0)
{
	path = m->argv;
	path = path.substr(0, (path.find_last_of('m')));
	
	gapOpen = abs(go);				//	This is the penalty to assess for opening a gap (gapOpen >= 0)
	gapExtend = abs(ge);				//	This is the penalty to assess for extending a gap (gapExtend >= 0)
		
	int randNumber = rand();
	candidateFileName = toString(randNumber) + ".candidate";
	templateFileName = toString(randNumber) + ".template";
	blastFileName = toString(randNumber) + ".pairwise";
}

//**************************************************************************************************/

BlastAlignment::~BlastAlignment(){		//	The desctructor should clean up by removing the temporary 
	m->mothurRemove(candidateFileName);	//	files used to run bl2seq
	m->mothurRemove(templateFileName);
	m->mothurRemove(blastFileName);
}

//**************************************************************************************************/

void BlastAlignment::align(string seqA, string seqB){	//Use blastn to align the two sequences

	ofstream candidateFile(candidateFileName.c_str());	//	Write the sequence to be aligned to a temporary candidate seq file
	candidateFile << ">candidate" << endl << seqA << endl;
	candidateFile.close();
	
	ofstream templateFile(templateFileName.c_str());	//	Write the unaligned template sequence to a temporary candidate seq file
	templateFile << ">template" << endl << seqB << endl;
	templateFile.close();
	
	//	The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
	//	that we don't want to apply any kind of complexity filtering (-F F)
	string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
	blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
	blastCommand +=	" -G " + toString(gapOpen) + " -E " + toString(gapExtend);
	
	system(blastCommand.c_str());	//	Here we assume that "bl2seq" is in the users path or in the same folder as
									//	this executable
	setPairwiseSeqs();
}

/**************************************************************************************************/

void BlastAlignment::setPairwiseSeqs(){	//	This method call assigns the blast generated alignment
															//	to the pairwise entry in the Sequence class for the 
															//	candidate and template Sequence objects
	ifstream blastFile;
	m->openInputFile(blastFileName, blastFile);
	
	seqAaln = "";
	seqBaln = "";
	
	int candidateLength, templateLength;
	char d;
	
	string candidateName, templateName;
	
	while((d=blastFile.get()) != '='){}
	blastFile >> candidateName;					//	Get the candidate sequence name from flatfile
	
	while((d=blastFile.get()) != '('){}
	blastFile >> candidateLength;				//	Get the candidate sequence length from flatfile
	
	while((d=blastFile.get())){
		if(d == '>'){
			blastFile >> templateName;			//	Get the template sequence name from flatfile
			break;
		}
		else if(d == '*'){									//	We go here if there is no significant match
			
			seqAstart = 0;
			seqBstart = 0;
			seqAend = 0;
			seqBend = 0;
			pairwiseLength = 0;
			
//			string dummy;
//			while(dummy != "query:"){	m->mothurOut(dummy, ""); m->mothurOutEndLine(); blastFile >> dummy;	}
//			blastFile >> seqBend;
//			m->mothurOut(toString(seqBend), ""); m->mothurOutEndLine();
//			for(int i=0;i<seqBend;i++){
//				seqAaln += 'Z';
//				seqBaln += 'X';
//			}
//			pairwiseLength = 0;
			return;
		}
	}
	
	while((d=blastFile.get()) != '='){}
	blastFile >> templateLength;				//	Get the template sequence length from flatfile
		
	while((d=blastFile.get()) != 'Q'){}			//	Suck up everything else until we get to the start of the alignment
	int queryStart, sbjctStart, queryEnd, sbjctEnd;
	string queryLabel, sbjctLabel, query, sbjct;

	blastFile >> queryLabel;	queryLabel = 'Q' + queryLabel;

	
	while(queryLabel == "Query:"){
		blastFile >> queryStart >> query >> queryEnd;
		
		while((d=blastFile.get()) != 'S'){};
		
		blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
		
		if(seqAaln == ""){
			seqAstart = queryStart;
			seqBstart = sbjctStart;
		}

		seqAaln += query;					//	concatenate each line of the sequence to what we already have
		seqBaln += sbjct;					//	for the query and template (subject) sequence
		
		blastFile >> queryLabel;
	}
	seqAend = queryEnd;
	seqBend = sbjctEnd;
	pairwiseLength = seqAaln.length();

	for(int i=1;i<seqBstart;i++){				//	Since the alignments don't always start at (1, 1), we need to pad
		seqAaln = 'Z' + seqAaln;				//	the sequences so that they start at the same point
		seqBaln = 'X' + seqBaln;
	}
	
	for(int i=seqBend+1;i<=templateLength;i++){	//	since the sequences don't necessarily end at the same point, we
		seqAaln += 'Z';							//	again need ot pad the sequences so that they extend to the length
		seqBaln += 'X';							//	of the template sequence
	}
	blastFile.close();
}

//**************************************************************************************************/