1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
#include "Fragment.h"
#include "commons.h"
extern char comp['t'+1];
extern Bloom * bloo1;
extern BinaryBank * SolidKmers;
extern char * prefix_trashable;
Fragment::Fragment(string fragment_sequence) : fragment_sequence(string(fragment_sequence))
{}
void Fragment::set_comment(string fragment_comment){
this->fragment_comment=string(fragment_comment);}
Fragment::~Fragment(){
//TODO: free the extreme kmers and the sequence and the comment
}
void Fragment::add_left_extreme_kmer (string left_extrem_kmer){
left_extrem_kmers.push_back(left_extrem_kmer);
}
void Fragment::add_right_extreme_kmer (string right_extrem_kmer){
right_extrem_kmers.push_back(right_extrem_kmer);
}
void Fragment::perform_extension(const string prefix, int max_graph_depth, const char extension_type, int max_nodes, parcours_t search_mode){
terminator->reset(); // distinct extensions may share kmers, however, a unique extension doesn't.
if(extension_type==1 || extension_type==2)
IterativeExtensions::when_to_stop_extending = IterativeExtensions::After_first_contig; // sequence
else
IterativeExtensions::when_to_stop_extending = IterativeExtensions::Until_max_depth; //graph
if(extension_type==1 || extension_type==3)
IterativeExtensions::traversal_type = IterativeExtensions::SimplePaths; // strict
else
IterativeExtensions::traversal_type = IterativeExtensions::Monument; // contigs
// construct and store the linear seqs'
IterativeExtensions::dont_output_first_nucleotide = true;
for (vector<string>::iterator it=left_extrem_kmers.begin(); it!= left_extrem_kmers.end(); ++it){
string linear_seq_name = prefix_trashable+(*it);
// printf("%s\n", linear_seq_name.c_str());
IterativeExtensions::construct_linear_seqs(*it,string(), max_graph_depth, linear_seq_name, 1, max_nodes, search_mode);
}
for (vector<string>::iterator it=right_extrem_kmers.begin(); it!= right_extrem_kmers.end(); ++it){
string linear_seq_name = prefix_trashable+(*it);
// printf("%s\n", linear_seq_name.c_str());
IterativeExtensions::construct_linear_seqs(*it,string(), max_graph_depth, linear_seq_name, 1, max_nodes, search_mode);
}
}
bool Fragment::is_left_kmer (const int left_extrem_kmer_id){
char * kmer_char = (char*)left_extrem_kmers[left_extrem_kmer_id].c_str();
const int k=strlen(kmer_char);
revcomp (kmer_char, k);
bool return_value=true;
for(int i=0;i<k;i++)
if(kmer_char[i] != fragment_sequence[i]) {return_value=false; break;}
// printf("LEFT %s %s %s\n",return_value?"true":"false", kmer_char, fragment_sequence.c_str());
revcomp (kmer_char, k);
return return_value;
}
bool Fragment::is_right_kmer (const int right_extrem_kmer_id){
const int k=right_extrem_kmers[right_extrem_kmer_id].size();
const int start_on_starter=fragment_sequence.size()-k;
bool return_value=true;
for(int i=0;i<k;i++)
if(right_extrem_kmers[right_extrem_kmer_id][i] != fragment_sequence[start_on_starter+i]) {return_value=false; break;}
// printf("RIGHT %s %s %s\n",return_value?"true":"false", right_extrem_kmers[right_extrem_kmer_id].c_str(), fragment_sequence.c_str());
return return_value;
}
void Fragment::fragment_output_graph(GraphOutput graph){
for(int i=0; i<left_extrem_kmers.size();i++){
if (is_left_kmer(i)) graph.original=true;
else graph.original=false;
graph.load_nodes_extremities((char *)(prefix_trashable+left_extrem_kmers[i]).c_str());
id_els first_id_els=graph.construct_graph(prefix_trashable+left_extrem_kmers[i], "LEFT");
graph.first_id_els=first_id_els;
}
for(int i=0; i<right_extrem_kmers.size();i++){
if (is_right_kmer(i)) graph.original=true;
else graph.original=false;
graph.load_nodes_extremities((char *)(prefix_trashable+right_extrem_kmers[i]).c_str());
id_els first_id_els=graph.construct_graph(prefix_trashable+right_extrem_kmers[i], "RIGHT");
graph.first_id_els=first_id_els;
}
}
void Fragment::fragment_output_sequence(int index, string output_res, bool erase){
char * rseq = (char *)"";
char * lseq = (char *)"";
int readlen;
FILE * output_res_file = fopen((char *) output_res.c_str(), erase?"w":"a");
fprintf(output_res_file, ">%s\n%s\n", fragment_comment.c_str(), fragment_sequence.c_str());
//LEFT kmers
for(int i=0; i<left_extrem_kmers.size();i++){
string left_kmer =left_extrem_kmers[i];
// get the sequence of the left extension
Bank * Left = new Bank((char*)(prefix_trashable+left_kmer).c_str());
// ! we should have a unique sequence in this file !
// Left->reset_max_readlen(1000000);
assert(Left->get_next_seq(&lseq,&readlen));
// We should revese complement the left sequence
revcomp(lseq,readlen);
fprintf(output_res_file, ">left_extension_%d\n%s\n", i, lseq);
Left->close();
delete Left;
}
//RIGHT kmers
for(int i=0; i<right_extrem_kmers.size();i++){
string right_kmer =right_extrem_kmers[i];
// get the sequence of the right extension
Bank * Right = new Bank((char*)(prefix_trashable+right_kmer).c_str());
// ! we should have a unique sequence in this file !
// Left->reset_max_readlen(1000000);
assert(Right->get_next_seq(&rseq,&readlen));
fprintf(output_res_file, ">right_extension_%d\n%s\n", i, rseq);
Right->close();
delete Right;
}
fclose(output_res_file);
}
|