1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
/*
* distancedb.cpp
*
*
* Created by Pat Schloss on 12/29/08.
* Copyright 2008 Patrick D. Schloss. All rights reserved.
*
*/
#include "database.hpp"
#include "sequence.hpp"
#include "distancedb.hpp"
#include "onegapignore.h"
/**************************************************************************************************/
DistanceDB::DistanceDB() : Database() {
try {
templateAligned = true;
templateSeqsLength = 0;
distCalculator = new oneGapIgnoreTermGapDist();
}
catch(exception& e) {
m->errorOut(e, "DistanceDB", "DistanceDB");
exit(1);
}
}
/**************************************************************************************************/
void DistanceDB::addSequence(Sequence seq) {
try {
//are the template sequences aligned
if (!isAligned(seq.getAligned())) {
templateAligned = false;
m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
m->mothurOutEndLine();
}
if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
data.push_back(seq);
}
catch(exception& e) {
m->errorOut(e, "DistanceDB", "addSequence");
exit(1);
}
}
/**************************************************************************************************/
//returns indexes to top matches
vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
try {
vector<int> topMatches;
Scores.clear();
bool templateSameLength = true;
string sequence = query->getAligned();
vector<seqDist> dists;
searchScore = -1.0;
if (numWanted > data.size()){
m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
m->mothurOutEndLine();
numWanted = data.size();
}
if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
if (templateSameLength && templateAligned) {
if (numWanted != 1) {
dists.resize(data.size());
//calc distance from this sequence to every sequence in the template
for (int i = 0; i < data.size(); i++) {
distCalculator->calcDist(*query, data[i]);
float dist = distCalculator->getDist();
//save distance to each template sequence
dists[i].seq1 = -1;
dists[i].seq2 = i;
dists[i].dist = dist;
}
sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest
//save distance of best match
searchScore = dists[0].dist;
//fill topmatches with numwanted closest sequences indexes
for (int i = 0; i < numWanted; i++) {
topMatches.push_back(dists[i].seq2);
Scores.push_back(dists[i].dist);
}
}else {
int bestIndex = 0;
float smallDist = 100000;
for (int i = 0; i < data.size(); i++) {
distCalculator->calcDist(*query, data[i]);
float dist = distCalculator->getDist();
//are you smaller?
if (dist < smallDist) {
bestIndex = i;
smallDist = dist;
}
}
searchScore = smallDist;
topMatches.push_back(bestIndex);
Scores.push_back(smallDist);
}
}else{
m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
m->mothurOutEndLine();
exit(1);
}
return topMatches;
}
catch(exception& e) {
m->errorOut(e, "DistanceDB", "findClosestSequence");
exit(1);
}
}
/**************************************************************************************************/
bool DistanceDB::isAligned(string seq){
try {
bool aligned;
int pos = seq.find_first_of(".-");
if (pos != seq.npos) {
aligned = true;
}else { aligned = false; }
return aligned;
}
catch(exception& e) {
m->errorOut(e, "DistanceDB", "isAligned");
exit(1);
}
}
/**************************************************************************************************/
|