1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
/*
* knn.cpp
* Mothur
*
* Created by westcott on 11/4/09.
* Copyright 2009 Schloss Lab. All rights reserved.
*
*/
#include "knn.h"
/**************************************************************************************************/
Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid, string version)
: Classify(), num(n), search(method) {
try {
threadID = tid;
shortcuts = true;
//create search database and names vector
generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch, version);
}
catch(exception& e) {
m->errorOut(e, "Knn", "Knn");
exit(1);
}
}
/**************************************************************************************************/
void Knn::setDistName(string s) {
try {
outDistName = s;
ofstream outDistance;
Utils util; util.openOutputFile(outDistName, outDistance);
outDistance << "Name\tBestMatch\tDistance" << endl;
outDistance.close();
}
catch(exception& e) {
m->errorOut(e, "Knn", "setDistName");
exit(1);
}
}
/**************************************************************************************************/
Knn::~Knn() {
try {
delete phyloTree;
if (database != nullptr) { delete database; }
}
catch(exception& e) {
m->errorOut(e, "Knn", "~Knn");
exit(1);
}
}
/**************************************************************************************************/
string Knn::getTaxonomy(Sequence* seq, string& simpleTax, bool& flipped) {
try {
string tax;
simpleTax = "";
//use database to find closest seq
vector<float> Scores;
vector<int> closest = database->findClosestSequences(seq, num, Scores);
Utils util;
if (search == "distance") { ofstream outDistance; util.openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << Scores[0] << endl; outDistance.close(); }
if (m->getControl_pressed()) { return tax; }
vector<string> closestNames;
for (int i = 0; i < closest.size(); i++) {
//find that sequences taxonomy in map
it = taxonomy.find(names[closest[i]]);
//is this sequence in the taxonomy file
if (it == taxonomy.end()) { //error not in file
m->mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + ".\n");
}else{ closestNames.push_back(it->first); }
}
if (closestNames.size() == 0) {
m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. \n");
tax = "unknown;";
}else{
tax = findCommonTaxonomy(closestNames);
if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ".\n"); tax = "unknown;"; }
}
simpleTax = tax;
return tax;
}
catch(exception& e) {
m->errorOut(e, "Knn", "getTaxonomy");
exit(1);
}
}
/**************************************************************************************************/
string Knn::findCommonTaxonomy(vector<string> closest) {
try {
string conTax;
//create a tree containing sequences from this bin
PhyloTree p;
for (int i = 0; i < closest.size(); i++) { p.addSeqToTree(closest[i], taxonomy[closest[i]]); }
//build tree
p.assignHeirarchyIDs(0);
TaxNode currentNode = p.get(0);
//at each level
while (currentNode.children.size() != 0) { //you still have more to explore
TaxNode bestChild;
int bestChildSize = 0;
//go through children
for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
TaxNode temp = p.get(itChild->second);
//select child with largest accessions - most seqs assigned to it
if (temp.accessions.size() > bestChildSize) {
bestChild = p.get(itChild->second);
bestChildSize = temp.accessions.size();
}
}
if (bestChildSize == closest.size()) { //if yes, add it
conTax += bestChild.name + ";";
}else{ //if no, quit
break;
}
//move down a level
currentNode = bestChild;
}
return conTax;
}
catch(exception& e) {
m->errorOut(e, "Knn", "findCommonTaxonomy");
exit(1);
}
}
/**************************************************************************************************/
|