File: DeBruijnGraph.hpp

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (100 lines) | stat: -rw-r--r-- 2,815 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#pragma once

#include <map>
#include "sequenceUtil.hpp"




class DeBruijnKmer {
    
public:

    DeBruijnKmer(kmer_int_type_t k, long long kmer_id); // kmer_id is just a unique identifier for that node, having nothing inherrently to do w/ sequence info itself.

    //DeBruijnKmer(const DeBruijnKmer& dk);    # use default copy constructor
    
    long long getID() const;
    kmer_int_type_t get_kmer_int_val() const;
    
    vector<kmer_int_type_t> get_prev_kmers(unsigned int kmer_length);
    vector<kmer_int_type_t> get_next_kmers(unsigned int kmer_length);
    
    void add_prev_kmer(kmer_int_type_t k, unsigned int kmer_length);
    void add_next_kmer(kmer_int_type_t k);

    string toString(int kmer_length);
    
    unsigned int increment_kmer_count(unsigned int kmer_count);
    unsigned int get_kmer_count() const;

    void add_kmer_annotation(string annotation);
    vector<string> get_kmer_annotations();
    
    long long id;
    kmer_int_type_t _kmer ;
    unsigned int _kmer_count;
    vector<string> _annotations;
    
    
    char _prev; // bit array GATC indicating prev kmers
    char _next; // ditto for next kmers
    
    static const char _G_mask; // = 8;
    static const char _A_mask; // = 4;
    static const char _T_mask; // = 2;
    static const char _C_mask; // = 1;
    
    string get_annotations_string();
    
    
};


typedef map<kmer_int_type_t,DeBruijnKmer> DeBruijnKmerMap;


class DeBruijnGraph {

public:

    DeBruijnGraph(unsigned int kmer_length);

    void add_sequence(const string& accession, const string& sequence, bool sStrand, unsigned int cov_val);
    
    DeBruijnKmer& get_kmer_node(kmer_int_type_t t);    
    
    string toString();
    string toDOT(bool sStrand);    
    string toChrysalisFormat(int component_id, bool sStrand);
    
    vector<DeBruijnKmer> get_root_kmers(bool sStrand);
    
    bool kmerExists(kmer_int_type_t kval);
    
    vector<string> get_candidate_weldmers(kmer_int_type_t kmer_val, int weldmer_length);
    
    unsigned int get_kmer_length();
    

private:

    unsigned int _kmer_length;
    long long _kmer_id_counter;


    map<kmer_int_type_t,DeBruijnKmer> _kmer_map;
    
        
    void recursively_construct_kmer_extensions(kmer_int_type_t seed_kmer_val, 
                                               vector<char>& kmer_extension_chars, 
                                               vector<string>& extension_kmer_strings, 
                                               char direction, // L | R  fg left or right from the seed.
                                               map<kmer_int_type_t,bool>& kmer_seen,
                                               int flank_extension_length);            
    
    
    
    string get_kmer_from_char_vector(vector<char> char_vec, char direction);
    
};