File: rftreenode.cpp

package info (click to toggle)
mothur 1.33.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 11,248 kB
  • ctags: 12,231
  • sloc: cpp: 152,046; fortran: 665; makefile: 74; sh: 34
file content (102 lines) | stat: -rw-r--r-- 4,128 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
//
//  rftreenode.cpp
//  Mothur
//
//  Created by Sarah Westcott on 10/2/12.
//  Copyright (c) 2012 Schloss Lab. All rights reserved.
//

#include "rftreenode.hpp"

/***********************************************************************/
RFTreeNode::RFTreeNode(vector< vector<int> > bootstrappedTrainingSamples,
                       vector<int> globalDiscardedFeatureIndices,
                       int numFeatures,
                       int numSamples,
                       int numOutputClasses,
                       int generation,
                       int nodeId,
                       float featureStandardDeviationThreshold)

            : bootstrappedTrainingSamples(bootstrappedTrainingSamples),
            globalDiscardedFeatureIndices(globalDiscardedFeatureIndices),
            numFeatures(numFeatures),
            numSamples(numSamples),
            numOutputClasses(numOutputClasses),
            generation(generation),
            isLeaf(false),
            outputClass(-1),
            nodeId(nodeId),
            testSampleMisclassificationCount(0),
            splitFeatureIndex(-1),
            splitFeatureValue(-1),
            splitFeatureEntropy(-1.0),
            ownEntropy(-1.0),
            featureStandardDeviationThreshold(featureStandardDeviationThreshold),
            bootstrappedFeatureVectors(numFeatures, vector<int>(numSamples, 0)),
            bootstrappedOutputVector(numSamples, 0),
            leftChildNode(NULL),
            rightChildNode(NULL),
            parentNode(NULL) {
                
    m = MothurOut::getInstance();
    
    for (int i = 0; i < numSamples; i++) {    // just doing a simple transpose of the matrix
        if (m->control_pressed) { break; }
        for (int j = 0; j < numFeatures; j++) { bootstrappedFeatureVectors[j][i] = bootstrappedTrainingSamples[i][j]; }
    }
    
    for (int i = 0; i < numSamples; i++) { if (m->control_pressed) { break; }
        bootstrappedOutputVector[i] = bootstrappedTrainingSamples[i][numFeatures]; }
    
    createLocalDiscardedFeatureList();
    updateNodeEntropy();
}
/***********************************************************************/
int RFTreeNode::createLocalDiscardedFeatureList(){
    try {
        
        for (int i = 0; i < numFeatures; i++) {
                // TODO: need to check if bootstrappedFeatureVectors == numFeatures, in python code we are using bootstrappedFeatureVectors instead of numFeatures
            if (m->control_pressed) { return 0; } 
            vector<int>::iterator it = find(globalDiscardedFeatureIndices.begin(), globalDiscardedFeatureIndices.end(), i);
            if (it == globalDiscardedFeatureIndices.end()) {                           // NOT FOUND
                double standardDeviation = m->getStandardDeviation(bootstrappedFeatureVectors[i]);  
                if (standardDeviation <= featureStandardDeviationThreshold) { localDiscardedFeatureIndices.push_back(i); }
            }
        }
        
        return 0;
    }
    catch(exception& e) {
        m->errorOut(e, "RFTreeNode", "createLocalDiscardedFeatureList");
        exit(1);
    }  
}
/***********************************************************************/
int RFTreeNode::updateNodeEntropy() {
    try {
        
        vector<int> classCounts(numOutputClasses, 0);
        for (int i = 0; i < bootstrappedOutputVector.size(); i++) {
            classCounts[bootstrappedOutputVector[i]]++;
        }
        int totalClassCounts = accumulate(classCounts.begin(), classCounts.end(), 0);
        double nodeEntropy = 0.0;
        for (int i = 0; i < classCounts.size(); i++) {
            if (m->control_pressed) { return 0; }
            if (classCounts[i] == 0) continue;
            double probability = (double)classCounts[i] / (double)totalClassCounts;
            nodeEntropy += -(probability * log2(probability));
        }
        ownEntropy = nodeEntropy;
        
        return 0;
    }
    catch(exception& e) {
        m->errorOut(e, "RFTreeNode", "updateNodeEntropy");
        exit(1);
    } 
}

/***********************************************************************/