1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
/*
* suffixdb.cpp
*
*
* Created by Pat Schloss on 12/16/08.
* Copyright 2008 Patrick D. Schloss. All rights reserved.
*
* This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an
* encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest objecct
* is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also
* provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search
* method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I
* would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but
* faster than blast
*
*/
#include "database.hpp"
#include "sequence.hpp"
#include "suffixtree.hpp"
#include "suffixdb.hpp"
/**************************************************************************************************/
SuffixDB::SuffixDB(int numSeqs) : Database() {
suffixForest.resize(numSeqs);
count = 0;
}
/**************************************************************************************************/
SuffixDB::SuffixDB() : Database() {
count = 0;
}
/**************************************************************************************************/
//assumes sequences have been added using addSequence
vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
try {
vector<int> topMatches;
string processedSeq = candidateSeq->convert2ints(); // the candidate sequence needs to be a string of ints
vector<seqMatch> seqMatches;
for(int i=0;i<suffixForest.size();i++){ // scan through the forest and see what the minimum
int count = suffixForest[i].countSuffixes(processedSeq); // return score is and keep track of the
seqMatch temp(i, count);
seqMatches.push_back(temp);
}
//sorts putting smallest matches first
sort(seqMatches.begin(), seqMatches.end(), compareSeqMatchesReverse);
searchScore = seqMatches[0].match;
searchScore = 100 * (1. - searchScore / (float)processedSeq.length());
//save top matches
for (int i = 0; i < num; i++) {
topMatches.push_back(seqMatches[i].seq);
}
// return the Sequence object that has the minimum score
return topMatches;
}
catch(exception& e) {
m->errorOut(e, "SuffixDB", "findClosestSequences");
exit(1);
}
}
/**************************************************************************************************/
//adding the sequences generates the db
void SuffixDB::addSequence(Sequence seq) {
try {
suffixForest[count].loadSequence(seq);
count++;
}
catch(exception& e) {
m->errorOut(e, "SuffixDB", "addSequence");
exit(1);
}
}
/**************************************************************************************************/
SuffixDB::~SuffixDB(){
for (int i = (suffixForest.size()-1); i >= 0; i--) { suffixForest.pop_back(); }
}
/**************************************************************************************************/
|