File: Ktree.cpp

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (134 lines) | stat: -rw-r--r-- 3,207 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include "Ktree.hpp"
#include <iostream>
#include <sstream>


Ktree::Ktree() {

    ktree_node_counter = 0;
    
    KtreeNode root_node ('_', 0);
    ktree_node_list.push_back(root_node);

}


KtreeNode& Ktree::get_root_node() {
    
    return(ktree_node_list[0]);
}

void Ktree::add_kmer (string kmer) {
    
    // cerr << "Adding kmer: " << kmer << endl;
    
    long pos = 0; // start with root node.


    for (unsigned int i = 0; i < kmer.length(); i++) {

        KtreeNode& node = ktree_node_list[pos];
                
        char c = kmer[i];
        // cerr << "k-char: " << c << " at pos: " << i << endl;
        // cerr << " corresponds to parent node: " << node.toString();
        
        if (node.has_child(c)) {
            
            pos = node.get_child(c);
            //cerr << "\thas child for: " << c << " at pos: " << pos << ".\n";
            
        }
        else {
            //cerr << "no child, adding it." << endl;
            KtreeNode newNode (c, 0);
            long orig_pos = pos;
            pos = add_node_to_list(newNode); // invalidates earlier node reference, so re-retrieve it below

            KtreeNode& node = ktree_node_list[orig_pos];
            node.add_child(c, pos);

            // cerr << "\tnode now described as: " << node.toString();
            // cerr << "\tOR: " << ktree_node_list[orig_pos].toString();
            
        }
        
        //cerr << "KmerTree:\n" << toString();
        
        //cerr << "reporting kmer counts:" << endl;
        //report_kmer_counts();
    }
    
    // node is now leaf node

    // update the leaf count.
    KtreeNode& node = ktree_node_list[pos];
    node.set_count( node.get_count() + 1);
    ktree_node_list[pos] = node;
    
    // cerr << "-kmer added.\n\n";

}

long Ktree::add_node_to_list(KtreeNode node) {

    // cerr << "Adding node to list." << endl;

    ktree_node_counter++;
    ktree_node_list.push_back(node);
    
    if (ktree_node_list.size() - 1 != ktree_node_counter) {
        throw ("Error, node list and last index are out of synch");
    }

    // cerr << "done.\n";
    return(ktree_node_counter);
    
}

void Ktree::report_kmer_counts() {
    
    KtreeNode& root = get_root_node();
    
    recurse_through_kmer_counts("", root);
}

void Ktree::recurse_through_kmer_counts(string prefix, KtreeNode& node) {

    if (node.has_children()) {
        
        vector<char> children_chars = node.get_children();
        
        for (vector<char>::iterator it = children_chars.begin(); it != children_chars.end(); it++) {
            
            char c = *it;
            long child_id = node.get_child(c);
            KtreeNode& child = ktree_node_list[child_id];
            
            string prefix_extend = prefix + c;
            recurse_through_kmer_counts(prefix_extend, child);
        }
    }

    else {

        cout << prefix << "\t" << node.get_count() << endl;
    }

}

            
            
string Ktree::toString() {

    stringstream s;

    for (vector<KtreeNode>::iterator it = ktree_node_list.begin(); it != ktree_node_list.end(); it++) {
        
        KtreeNode& k = *it;
        
        s << k.toString();
    }

    return(s.str());
}