File: alignmentdb.cpp

package info (click to toggle)
mothur 1.48.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,692 kB
  • sloc: cpp: 161,866; makefile: 122; sh: 31
file content (166 lines) | stat: -rwxr-xr-x 5,847 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/*
 *  alignmentdb.cpp
 *  Mothur
 *
 *  Created by westcott on 11/4/09.
 *  Copyright 2009 Schloss Lab. All rights reserved.
 *
 */

#include "alignmentdb.h"
#include "kmerdb.hpp"
#include "suffixdb.hpp"

/**************************************************************************************************/
AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid, bool writeShortcut){		//	This assumes that the template database is in fasta format, may
	try {											//	need to alter this in the future?
		m = MothurOut::getInstance();
        current = CurrentFile::getInstance();
		longest = 0;
		method = s;
		bool needToGenerate = true;
		threadID = tid;
		Utils util;
        
        long start = time(nullptr);
        m->mothurOut("\nReading in the " + fastaFileName + " template sequences...\t");	cout.flush();
        //bool aligned = false;
        int tempLength = 0;
        
        ifstream fastaFile; util.openInputFile(fastaFileName, fastaFile);
        
        while (!fastaFile.eof()) {
            Sequence temp(fastaFile);  gobble(fastaFile);
            
            if (m->getControl_pressed()) {  templateSequences.clear(); break;  }
            
            if (temp.getName() != "") {
                templateSequences.push_back(temp);
                
                //save longest base
                if (temp.getUnaligned().length() >= longest)  { longest = ((int)temp.getUnaligned().length()+1); }
                
                if (tempLength != 0) {
                    if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->setControl_pressed(true); }
                }else { tempLength = (int)temp.getAligned().length(); }
            }
        }
        fastaFile.close();
        
        numSeqs = (int)templateSequences.size();
        //all of this is elsewhere already!
        
        m->mothurOut("DONE.\n");
        cout.flush();
        m->mothurOut("It took " + toString(time(nullptr) - start) + " to read  " + toString(templateSequences.size()) + " sequences.\n");   

		
		//in case you delete the seqs and then ask for them
		emptySequence = Sequence();
		emptySequence.setName("no_match");
		emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
		emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
		
		
		string kmerDBName;
		if(method == "kmer")			{	
			search = new KmerDB(fastaFileName, kmerSize);			

            kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
            
            ifstream kmerFileTest(kmerDBName.c_str());
				
            if(kmerFileTest){
                string line = util.getline(kmerFileTest);
                bool GoodFile = util.checkReleaseVersion(line, current->getVersion());  kmerFileTest.close();
                int shortcutTimeStamp = util.getTimeStamp(kmerDBName);
                int referenceTimeStamp = util.getTimeStamp(fastaFileName);
                
                //if the shortcut file is older then the reference file, remake shortcut file
                if (shortcutTimeStamp < referenceTimeStamp) {  GoodFile = false;  }
                
                if (GoodFile) {  needToGenerate = false;	}
            }
			
		}
		else if(method == "suffix")		{	search = new SuffixDB(numSeqs);								}
        else {
			method = "kmer";
			m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.\n");
			search = new KmerDB(fastaFileName, 8);
		}
		
		if (!m->getControl_pressed()) {
			if (needToGenerate) {
				//add sequences to search 
				for (int i = 0; i < templateSequences.size(); i++) {
					search->addSequence(templateSequences[i]);
					
					if (m->getControl_pressed()) {  templateSequences.clear(); break;  }
				}
				
				if (m->getControl_pressed()) {  templateSequences.clear();  }
				
                if ((method != "kmer") || ((method == "kmer") && (writeShortcut))) { search->generateDB(); }
                
			}else if ((method == "kmer") && (!needToGenerate)) {
				ifstream kmerFileTest(kmerDBName.c_str());
				search->readDB(kmerFileTest);
			}
		
			search->setNumSeqs(numSeqs);
		}
		
	}
	catch(exception& e) {
		m->errorOut(e, "AlignmentDB", "AlignmentDB");
		exit(1);
	}
}
/**************************************************************************************************/
AlignmentDB::AlignmentDB(string s){		 
	try {											
		m = MothurOut::getInstance();
		method = s;
		
		if(method == "suffix")		{	search = new SuffixDB();	}
		else						{	search = new KmerDB();		}

				
		//in case you delete the seqs and then ask for them
		emptySequence = Sequence();
		emptySequence.setName("no_match");
		emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
		emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
		
	}
	catch(exception& e) {
		m->errorOut(e, "AlignmentDB", "AlignmentDB");
		exit(1);
	}
}
/**************************************************************************************************/
AlignmentDB::~AlignmentDB() {  delete search;	}
/**************************************************************************************************/
Sequence AlignmentDB::findClosestSequence(Sequence* seq, float& searchScore) const {
	try{
        
        vector<float> scores;
		vector<int> spot = search->findClosestSequences(seq, 1, scores);
	
        if (spot.size() != 0)	{	searchScore = scores[0]; return templateSequences[spot[0]];	}
        else					{ 	searchScore = 0; return emptySequence;                      }
		
	}
	catch(exception& e) {
		m->errorOut(e, "AlignmentDB", "findClosestSequence");
		exit(1);
	}
}
/**************************************************************************************************/