1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
/*
* taxonomyequalizer.cpp
* Mothur
*
* Created by westcott on 11/20/09.
* Copyright 2009 Schloss Lab. All rights reserved.
*
*/
#include "taxonomyequalizer.h"
/**************************************************************************************************/
TaxEqualizer::TaxEqualizer(string tfile, int c, string o) : cutoff(c), outputDir(o) {
try {
m = MothurOut::getInstance();
containsConfidence = false;
ifstream inTax; util.openInputFile(tfile, inTax);
highestLevel = getHighestLevel(inTax);
if (!m->getControl_pressed()) {
//if the user has specified a cutoff and it's smaller than the highest level
if ((cutoff != -1) && (cutoff < highestLevel)) {
highestLevel = cutoff;
}else if (cutoff > highestLevel) {
m->mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel)+ "\n");
}
inTax.close();
ifstream in; util.openInputFile(tfile, in);
equalizedFile = outputDir + util.getRootName(util.getSimpleName(tfile)) + "equalized.taxonomy";
ofstream out; util.openOutputFile(equalizedFile, out);
string name, tax;
while (in) {
if (m->getControl_pressed()) { break; }
in >> name; gobble(in);
tax = util.getline(in); gobble(in);
if (containsConfidence) { util.removeConfidences(tax); }
//is this a taxonomy that needs to be extended?
if (seqLevels[name] < highestLevel) {
tax = util.addUnclassifieds(tax, highestLevel, containsConfidence);
}else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
tax = util.trimTax(tax, highestLevel);
}
out << name << '\t' << tax << endl;
}
in.close();
out.close();
if (m->getControl_pressed()) { util.mothurRemove(equalizedFile); }
}else { inTax.close(); }
}
catch(exception& e) {
m->errorOut(e, "TaxEqualizer", "TaxEqualizer");
exit(1);
}
}
/**************************************************************************************************/
int TaxEqualizer::getHighestLevel(ifstream& in) {
try {
int level = 0; string name, tax;
while (in) {
in >> name; gobble(in);
tax = util.getline(in); gobble(in);
//count levels in this taxonomy
int thisLevel = 0;
for (int i = 0; i < tax.length(); i++) { if (tax[i] == ';') { thisLevel++; } }
//save sequences level
seqLevels[name] = thisLevel;
//is this the longest taxonomy?
if (thisLevel > level) {
level = thisLevel;
testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
}
if (!containsConfidence) {
int openParen = testTax.find_last_of('(');
int closeParen = testTax.find_last_of(')');
if ((openParen != string::npos) && (closeParen != string::npos)) {
string confidenceScore = testTax.substr(openParen+1, (closeParen-(openParen+1)));
if (util.isNumeric1(confidenceScore)) { //its a confidence
containsConfidence = true;
}
}
}
}
return level;
}
catch(exception& e) {
m->errorOut(e, "TaxEqualizer", "getHighestLevel");
exit(1);
}
}
/**************************************************************************************************/
|