File: eachgapignore.cpp

package info (click to toggle)
mothur 1.48.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 13,684 kB
  • sloc: cpp: 161,854; makefile: 122; sh: 31
file content (117 lines) | stat: -rw-r--r-- 4,458 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
//
//  eachgapignore.cpp
//  Mothur
//
//  Created by Sarah Westcott on 4/21/20.
//  Copyright © 2020 Schloss Lab. All rights reserved.
//

#include "eachgapignore.h"

/***********************************************************************/

double eachGapIgnoreTermGapDist::calcDist(Sequence A, Sequence B){
    try {
        string seqA = A.getAligned();
        string seqB = B.getAligned();
        
        int alignLength = seqA.length();
        bool overlap = false;
        
        int start = setStartIgnoreTermGap(seqA, seqB, overlap);
        int end = setEndIgnoreTermGap(seqA, seqB, overlap);
        
        //non-overlapping sequences
        if (!overlap) { return 1.0000; }
        
        int maxMinLength = end - start + 1;
        int diff = 0;
        for(int i=start;i<alignLength;i++){
            if(seqA[i] == '.' || seqB[i] == '.'){ //reached terminal gaps, so quit
                break;
            }
            else if((seqA[i] == '-' && seqB[i] == '-') || (seqA[i] == '-' && seqB[i] == '.') || (seqA[i] == '.' && seqB[i] == '-')){ maxMinLength--; } //comparing gaps, ignore
            else{
                if(seqA[i] != seqB[i]){
                    diff++;
                }
            }
            
            dist = (double)diff / maxMinLength;
            
            if (dist > cutoff) { return 1.0000; }
        }
        
        if(maxMinLength == 0)    {    dist = 1.0000;                                }
        else            {    dist = ((double)diff  / (double)maxMinLength);        }
        
        return dist;
    }
    catch(exception& e) {
        m->errorOut(e, "eachGapIgnoreTermGapDist", "calcDist");
        exit(1);
    }
}
/***********************************************************************/
vector<double> eachGapIgnoreTermGapDist::calcDist(Sequence A, classifierOTU otu, vector<int> cols){ //this function calcs the distance using only the columns provided
    try {
        vector<double> dists; dists.resize(otu.numSeqs, 0.0);
        
        //if you didn't select columns, use all columns
        if (cols.size() == 0) {
            for (int i = 0; i < otu.otuData.size(); i++) { cols.push_back(i); }
        }
        
        classifierOTU seq(A.getAligned());
        vector<int> starts = setStartsIgnoreTermGap(seq, otu, cols);
        vector<int> ends = setEndsIgnoreTermGap(seq, otu, cols);
        
        int alignLength = cols.size();
        
        for (int h = 0; h < otu.numSeqs; h++) {
            
            if (m->getControl_pressed()) { break; }
            
            if ((starts[h] == -1) && (ends[h] == -1)) { dists[h] = 1.0000; } //no overlap
            else {
                
                if (starts[h] == -1)    { starts[h] = 0;    }
                if (ends[h] == -1)      { ends[h] = 0;      }
                
                int maxMinLength = ends[h] - starts[h] + 1;
                int difference = 0;
                
                for(int i=starts[h];i<alignLength;i++){
                    
                    char seqA = seq.otuData[cols[i]][0];
                    vector<char> otuChars = otu.otuData[cols[i]];
                    char seqB = otuChars[0]; //assume column if identical
                    if (otuChars.size() == otu.numSeqs) {  seqB = otuChars[h]; }
                    
                    if(seqA == '.' || seqB == '.'){ i+=alignLength; } //terminal gaps, break;
                    
                    else if((seqA == '-' && seqB == '-') || (seqA == '-' && seqB == '.') || (seqA == '.' && seqB == '-')){ maxMinLength--; } //comparing gaps, ignore
                    else{
                        if(seqA != seqB){ difference++; }
                    }
                    
                    double distance = 1.0;
                    distance = (double)difference / maxMinLength;
                    
                    if (distance > cutoff) { dists[h] = 1.0000;    i+=alignLength;  } //break;
                }
                
                if(maxMinLength == 0)       {    dists[h] = 1.0000;                               }
                else if (dists[h] == 0.0)   {    dists[h] = (double)difference / maxMinLength;    } //not set
            }
        }
            
        return dists;
     }
     catch(exception& e) {
         m->errorOut(e, "eachGapIgnoreTermGapDist", "calcDist");
         exit(1);
     }
 }

/***********************************************************************/