File: knn.cpp

package info (click to toggle)
mothur 1.48.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 13,684 kB
  • sloc: cpp: 161,854; makefile: 122; sh: 31
file content (145 lines) | stat: -rwxr-xr-x 4,435 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
 *  knn.cpp
 *  Mothur
 *
 *  Created by westcott on 11/4/09.
 *  Copyright 2009 Schloss Lab. All rights reserved.
 *
 */

#include "knn.h"

/**************************************************************************************************/
Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid, string version)
: Classify(), num(n), search(method) {
	try {
		threadID = tid;
        shortcuts = true;
		
		//create search database and names vector
		generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch, version);
	}
	catch(exception& e) {
		m->errorOut(e, "Knn", "Knn");
		exit(1);
	}
}
/**************************************************************************************************/
void Knn::setDistName(string s) {
	try {
		outDistName = s;
		ofstream outDistance;
        Utils util; util.openOutputFile(outDistName, outDistance);
		outDistance << "Name\tBestMatch\tDistance" << endl;
		outDistance.close();
	}
	catch(exception& e) {
		m->errorOut(e, "Knn", "setDistName");
		exit(1);
	}
}
/**************************************************************************************************/
Knn::~Knn() {
	try {
		 delete phyloTree; 
		 if (database != nullptr) {  delete database; }
	}
	catch(exception& e) {
		m->errorOut(e, "Knn", "~Knn");
		exit(1);
	}
}
/**************************************************************************************************/
string Knn::getTaxonomy(Sequence* seq, string& simpleTax, bool& flipped) {
	try {
		string tax;
		simpleTax = "";
		//use database to find closest seq
        vector<float> Scores;
		vector<int> closest = database->findClosestSequences(seq, num, Scores); 
	
         Utils util;
		if (search == "distance") { ofstream outDistance; util.openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << Scores[0] << endl; outDistance.close();  }
	
		if (m->getControl_pressed()) { return tax; }

		vector<string> closestNames;
		for (int i = 0; i < closest.size(); i++) {
			//find that sequences taxonomy in map
			it = taxonomy.find(names[closest[i]]);
		
			//is this sequence in the taxonomy file
			if (it == taxonomy.end()) { //error not in file
				m->mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file.  It will be eliminated as a match to sequence " + seq->getName() + ".\n"); 
			}else{   closestNames.push_back(it->first);	}
		}
		
		if (closestNames.size() == 0) {
			m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. \n"); 
			tax = "unknown;";
		}else{
			tax = findCommonTaxonomy(closestNames);
			if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ".\n"); tax = "unknown;"; }
		}
		
		simpleTax = tax;
		return tax;	
	}
	catch(exception& e) {
		m->errorOut(e, "Knn", "getTaxonomy");
		exit(1);
	}
}
/**************************************************************************************************/
string Knn::findCommonTaxonomy(vector<string> closest)  {
	try {
        string conTax;
		
		//create a tree containing sequences from this bin
		PhyloTree p;
		
		for (int i = 0; i < closest.size(); i++) { p.addSeqToTree(closest[i], taxonomy[closest[i]]); }
		
		//build tree
		p.assignHeirarchyIDs(0);
		
		TaxNode currentNode = p.get(0);
		
		//at each level
		while (currentNode.children.size() != 0) { //you still have more to explore
			
			TaxNode bestChild;
			int bestChildSize = 0;
			
			//go through children
			for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
				
				TaxNode temp = p.get(itChild->second);
				
				//select child with largest accessions - most seqs assigned to it
				if (temp.accessions.size() > bestChildSize) {
					bestChild = p.get(itChild->second);
					bestChildSize = temp.accessions.size();
				}
				
			}
			
			if (bestChildSize == closest.size()) { //if yes, add it
				conTax += bestChild.name + ";";
			}else{ //if no, quit
				break;
			}
			
			//move down a level
			currentNode = bestChild;
		}
		
		return conTax;
	}
	catch(exception& e) {
		m->errorOut(e, "Knn", "findCommonTaxonomy");
		exit(1);
	}
}
/**************************************************************************************************/