File: Fragment.cpp

package info (click to toggle)
mapsembler2 2.2.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 8,080 kB
  • ctags: 9,676
  • sloc: cpp: 51,021; ansic: 13,465; sh: 460; makefile: 409; asm: 271; python: 28
file content (148 lines) | stat: -rw-r--r-- 5,678 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#include "Fragment.h"
#include "commons.h"
extern char comp['t'+1];

extern Bloom * bloo1;
extern BinaryBank * SolidKmers;
extern char * prefix_trashable;



Fragment::Fragment(string fragment_sequence) : fragment_sequence(string(fragment_sequence))
{}

void Fragment::set_comment(string fragment_comment){
    this->fragment_comment=string(fragment_comment);}



Fragment::~Fragment(){
    //TODO: free the extreme kmers and the sequence and the comment
}

void Fragment::add_left_extreme_kmer (string left_extrem_kmer){
    left_extrem_kmers.push_back(left_extrem_kmer);
}
void Fragment::add_right_extreme_kmer (string right_extrem_kmer){
    right_extrem_kmers.push_back(right_extrem_kmer);
}


void Fragment::perform_extension(const string prefix, int max_graph_depth, const char extension_type, int max_nodes, parcours_t search_mode){
    terminator->reset(); // distinct extensions may share kmers, however, a unique extension doesn't.
    if(extension_type==1 || extension_type==2)
        IterativeExtensions::when_to_stop_extending = IterativeExtensions::After_first_contig; // sequence
    else
        IterativeExtensions::when_to_stop_extending = IterativeExtensions::Until_max_depth; //graph
    
    if(extension_type==1 || extension_type==3)
        IterativeExtensions::traversal_type = IterativeExtensions::SimplePaths; // strict
    else
        IterativeExtensions::traversal_type = IterativeExtensions::Monument; // contigs
    
    // construct and store the linear seqs'
    IterativeExtensions::dont_output_first_nucleotide = true;
    
    
    for (vector<string>::iterator it=left_extrem_kmers.begin(); it!= left_extrem_kmers.end(); ++it){
        string linear_seq_name = prefix_trashable+(*it);
//        printf("%s\n", linear_seq_name.c_str());
        IterativeExtensions::construct_linear_seqs(*it,string(), max_graph_depth, linear_seq_name, 1, max_nodes,  search_mode);
    }
    
    
    for (vector<string>::iterator it=right_extrem_kmers.begin(); it!= right_extrem_kmers.end(); ++it){
        string linear_seq_name = prefix_trashable+(*it);
//        printf("%s\n", linear_seq_name.c_str());
        IterativeExtensions::construct_linear_seqs(*it,string(), max_graph_depth, linear_seq_name, 1, max_nodes,  search_mode);
    }

    
    
}

bool Fragment::is_left_kmer (const int left_extrem_kmer_id){
    char * kmer_char = (char*)left_extrem_kmers[left_extrem_kmer_id].c_str();
    const int k=strlen(kmer_char);
    revcomp (kmer_char, k);

    bool return_value=true;
    for(int i=0;i<k;i++)
        if(kmer_char[i] != fragment_sequence[i]) {return_value=false; break;}
//    printf("LEFT %s %s %s\n",return_value?"true":"false", kmer_char, fragment_sequence.c_str());
    revcomp (kmer_char, k);
    return return_value;
}

bool Fragment::is_right_kmer (const int right_extrem_kmer_id){
    const int k=right_extrem_kmers[right_extrem_kmer_id].size();
    const int start_on_starter=fragment_sequence.size()-k;
    bool return_value=true;
    for(int i=0;i<k;i++)
        if(right_extrem_kmers[right_extrem_kmer_id][i] != fragment_sequence[start_on_starter+i])  {return_value=false; break;}
//    printf("RIGHT %s %s %s\n",return_value?"true":"false", right_extrem_kmers[right_extrem_kmer_id].c_str(), fragment_sequence.c_str());
    return return_value;
}

void Fragment::fragment_output_graph(GraphOutput graph){
    
    for(int i=0; i<left_extrem_kmers.size();i++){
        if (is_left_kmer(i)) graph.original=true;
        else graph.original=false;
        graph.load_nodes_extremities((char *)(prefix_trashable+left_extrem_kmers[i]).c_str());
        id_els first_id_els=graph.construct_graph(prefix_trashable+left_extrem_kmers[i], "LEFT");
        graph.first_id_els=first_id_els;
    }
    
    
    for(int i=0; i<right_extrem_kmers.size();i++){
        if (is_right_kmer(i)) graph.original=true;
        else graph.original=false;
        graph.load_nodes_extremities((char *)(prefix_trashable+right_extrem_kmers[i]).c_str());
        id_els first_id_els=graph.construct_graph(prefix_trashable+right_extrem_kmers[i], "RIGHT");
        graph.first_id_els=first_id_els;
    }
    
}





void Fragment::fragment_output_sequence(int index, string output_res, bool erase){
    char * rseq = (char *)"";
    char * lseq = (char *)"";
    int readlen;
    FILE * output_res_file = fopen((char *) output_res.c_str(), erase?"w":"a");
    
    fprintf(output_res_file, ">%s\n%s\n", fragment_comment.c_str(), fragment_sequence.c_str());
    
    //LEFT kmers
    for(int i=0; i<left_extrem_kmers.size();i++){
        string left_kmer =left_extrem_kmers[i];
        // get the sequence of the left extension
        Bank * Left = new Bank((char*)(prefix_trashable+left_kmer).c_str());
        // ! we should have a unique sequence in this file !
        //        Left->reset_max_readlen(1000000);
        assert(Left->get_next_seq(&lseq,&readlen));
      
        // We should revese complement the left sequence
        revcomp(lseq,readlen);
        fprintf(output_res_file, ">left_extension_%d\n%s\n", i, lseq);
        Left->close();
        delete Left;
    }
    //RIGHT kmers
    for(int i=0; i<right_extrem_kmers.size();i++){
        string right_kmer =right_extrem_kmers[i];
        // get the sequence of the right extension
        Bank * Right = new Bank((char*)(prefix_trashable+right_kmer).c_str());
        // ! we should have a unique sequence in this file !
        //        Left->reset_max_readlen(1000000);
        assert(Right->get_next_seq(&rseq,&readlen));
        fprintf(output_res_file, ">right_extension_%d\n%s\n", i, rseq);
        Right->close();
        delete Right;
    }
    fclose(output_res_file); 
}