1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
//
// onegapdist.cpp
// Mothur
//
// Created by Sarah Westcott on 3/27/20.
// Copyright © 2020 Schloss Lab. All rights reserved.
//
#include "onegapdist.h"
/***********************************************************************/
double oneGapDist::calcDist(Sequence A, Sequence B){
try {
int difference = 0;
bool openGapA = false;
bool openGapB = false;
string seqA = A.getAligned();
string seqB = B.getAligned();
int alignLength = seqA.length();
int start = setStart(seqA, seqB);
int end = setEnd(seqA, seqB);
int maxMinLength = end - start + 1;
for(int i=start;i<alignLength;i++){
//comparing gaps, ignore
if((seqA[i] == '-' && seqB[i] == '-') || (seqA[i] == '.' && seqB[i] == '-') || (seqA[i] == '-' && seqB[i] == '.')){ maxMinLength--; }
//trailing gaps, quit we already calculated all the diffs
else if(seqA[i] == '.' && seqB[i] == '.'){ break; }
else if(seqB[i] != '-' && (seqA[i] == '-' || seqA[i] == '.')){ //seqB is a base, seqA is a gap
if(!openGapA){
difference++;
openGapA = true;
openGapB = false;
}else { maxMinLength--; }
}
else if(seqA[i] != '-' && (seqB[i] == '-' || seqB[i] == '.')){ //seqA is a base, seqB is a gap
if(!openGapB){
difference++;
openGapA = false;
openGapB = true;
}else { maxMinLength--; }
}
else if(seqA[i] != '-' && seqB[i] != '-'){ //both bases
openGapA = false;
openGapB = false;
//no match
if(seqA[i] != seqB[i]){ difference++; }
}
dist = (double)difference / maxMinLength;
if (dist > cutoff) { return 1.0000; }
}
if(maxMinLength == 0) { dist = 1.0000; }
else { dist = (double)difference / maxMinLength; }
return dist;
}
catch(exception& e) {
m->errorOut(e, "oneGapDist", "calcDist");
exit(1);
}
}
/***********************************************************************/
vector<double> oneGapDist::calcDist(Sequence A, classifierOTU otu, vector<int> cols){ //this function calcs the distance using only the columns provided
try {
vector<double> dists; dists.resize(otu.numSeqs, 0.0);
//if you didn't select columns, use all columns
if (cols.size() == 0) {
for (int i = 0; i < otu.otuData.size(); i++) { cols.push_back(i); }
}
classifierOTU seq(A.getAligned());
vector<int> starts = setStarts(seq, otu, cols);
vector<int> ends = setEnds(seq, otu, cols);
int alignLength = cols.size();
for (int h = 0; h < otu.numSeqs; h++) {
if (m->getControl_pressed()) { break; }
int maxMinLength = ends[h] - starts[h] + 1;
int difference = 0;
bool openGapA = false;
bool openGapB = false;
for(int i=starts[h];i<alignLength;i++){
char seqA = seq.otuData[cols[i]][0];
vector<char> otuChars = otu.otuData[cols[i]];
char seqB = otuChars[0]; //assume column if identical
if (otuChars.size() == otu.numSeqs) { seqB = otuChars[h]; }
if((seqA == '-' && seqB == '-') || (seqA == '.' && seqB == '-') || (seqA == '-' && seqB == '.')){ maxMinLength--; }
//trailing gaps, quit we already calculated all the diffs
else if(seqA == '.' && seqB == '.'){ i+=alignLength; } //break;
else if(seqB != '-' && (seqA == '-' || seqA == '.')){ //seqB is a base, seqA is a gap
if(!openGapA){
difference++;
openGapA = true;
openGapB = false;
}else { maxMinLength--; }
}
else if(seqA != '-' && (seqB == '-' || seqB == '.')){ //seqA is a base, seqB is a gap
if(!openGapB){
difference++;
openGapA = false;
openGapB = true;
}else { maxMinLength--; }
}
else if(seqA != '-' && seqB != '-'){ //both bases
openGapA = false;
openGapB = false;
//no match
if(seqA != seqB){ difference++; }
}
double distance = 1.0;
distance = (double)difference / maxMinLength;
if (distance > cutoff) { dists[h] = 1.0000; i+=alignLength; } //break;
}
if(maxMinLength == 0) { dists[h] = 1.0000; }
else if (dists[h] == 0.0) { dists[h] = (double)difference / maxMinLength; } //not set
}
return dists;
}
catch(exception& e) {
m->errorOut(e, "oneGapDist", "calcDist");
exit(1);
}
}
/***********************************************************************/
|