File: taxonomyequalizer.cpp

package info (click to toggle)
mothur 1.33.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 11,248 kB
  • ctags: 12,231
  • sloc: cpp: 152,046; fortran: 665; makefile: 74; sh: 34
file content (155 lines) | stat: -rw-r--r-- 4,272 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 *  taxonomyequalizer.cpp
 *  Mothur
 *
 *  Created by westcott on 11/20/09.
 *  Copyright 2009 Schloss Lab. All rights reserved.
 *
 */

#include "taxonomyequalizer.h"

/**************************************************************************************************/
TaxEqualizer::TaxEqualizer(string tfile, int c, string o) : cutoff(c), outputDir(o) {
	try {
		m = MothurOut::getInstance();
		containsConfidence = false;
		
		ifstream inTax;
		m->openInputFile(tfile, inTax);
	
		highestLevel = getHighestLevel(inTax);
		
		if (!m->control_pressed) { 
			
			//if the user has specified a cutoff and it's smaller than the highest level
			if ((cutoff != -1) && (cutoff < highestLevel)) { 
				highestLevel = cutoff;
			}else if (cutoff > highestLevel) {
				m->mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
				m->mothurOutEndLine();
			}
			
			inTax.close(); 
			ifstream in; 
			m->openInputFile(tfile, in);
			
			ofstream out;
			equalizedFile = outputDir + m->getRootName(m->getSimpleName(tfile)) + "equalized.taxonomy";
			m->openOutputFile(equalizedFile, out);
			
	
			string name, tax;
			while (in) {
			
				if (m->control_pressed) {  break; }
				
				in >> name >> tax;   m->gobble(in);
				
				if (containsConfidence) {  m->removeConfidences(tax);	}
				
				//is this a taxonomy that needs to be extended?
				if (seqLevels[name] < highestLevel) {
					extendTaxonomy(name, tax, highestLevel);
				}else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
					truncateTaxonomy(name, tax, highestLevel);
				}
				
				out << name << '\t' << tax << endl;
			}
			
			in.close();
			out.close();
			
			if (m->control_pressed) { m->mothurRemove(equalizedFile);  }
		}else { inTax.close(); }
		
	}
	catch(exception& e) {
		m->errorOut(e, "TaxEqualizer", "TaxEqualizer");
		exit(1);
	}
}
/**************************************************************************************************/
int TaxEqualizer::getHighestLevel(ifstream& in) {
	try {
		
		int level = 0;
		string name, tax;
		
		while (in) {
			in >> name >> tax;   m->gobble(in);
		
			//count levels in this taxonomy
			int thisLevel = 0;
			for (int i = 0; i < tax.length(); i++) {
				if (tax[i] == ';') {  thisLevel++;	}
			}
		
			//save sequences level
			seqLevels[name] = thisLevel;
	
			//is this the longest taxonomy?
			if (thisLevel > level) {  
				level = thisLevel;  
				testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
			} 
		}
		
		int pos = testTax.find_first_of('(');

		//if there are '(' then there are confidences we need to take out
		if (pos != -1) {  containsConfidence = true;  }
	
		return level;
					
	}
	catch(exception& e) {
		m->errorOut(e, "TaxEqualizer", "getHighestLevel");
		exit(1);
	}
}
/**************************************************************************************************/
void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
	try {
			
		//get last taxon
		tax = tax.substr(0, tax.length()-1);  //take off final ";"
		int pos = tax.find_last_of(';');
		string lastTaxon = tax.substr(pos+1);  
		lastTaxon += ";"; //add back on delimiting char
		tax += ";";
		
		int currentLevel = seqLevels[name];
		
		//added last taxon until you get desired level
		for (int i = currentLevel; i < desiredLevel; i++) {
			tax += lastTaxon;
		}
	}
	catch(exception& e) {
		m->errorOut(e, "TaxEqualizer", "extendTaxonomy");
		exit(1);
	}
}
/**************************************************************************************************/
void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
	try {
			
		int currentLevel = seqLevels[name];
		tax = tax.substr(0, tax.length()-1);  //take off final ";"
		
		//remove a taxon until you get to desired level
		for (int i = currentLevel; i > desiredLevel; i--) {
			tax = tax.substr(0,  tax.find_last_of(';'));
		}
		
		tax += ";";
	}
	catch(exception& e) {
		m->errorOut(e, "TaxEqualizer", "truncateTaxonomy");
		exit(1);
	}
}
/**************************************************************************************************/