1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
|
/* ***************************************************************************
*
* KisSplice
* de-novo calling alternative splicing events from RNA-seq data.
*
* ***************************************************************************
*
* Copyright INRIA
* contributors : Vincent Lacroix
* Pierre Peterlongo
* Gustavo Sacomoto
* Vincent Miele
* Alice Julien-Laferriere
* David Parsons
*
* pierre.peterlongo@inria.fr
* vincent.lacroix@univ-lyon1.fr
*
* This software is a computer program whose purpose is to detect alternative
* splicing events from RNA-seq data.
*
* This software is governed by the CeCILL license under French law and
* abiding by the rules of distribution of free software. You can use,
* modify and/ or redistribute the software under the terms of the CeCILL
* license as circulated by CEA, CNRS and INRIA at the following URL
* "http://www.cecill.info".
* As a counterpart to the access to the source code and rights to copy,
* modify and redistribute granted by the license, users are provided only
* with a limited warranty and the software's author, the holder of the
* economic rights, and the successive licensors have only limited
* liability.
* In this respect, the user's attention is drawn to the risks associated
* with loading, using, modifying and/or developing or reproducing the
* software by the user in light of its specific status of free software,
* that may mean that it is complicated to manipulate, and that also
* therefore means that it is reserved for developers and experienced
* professionals having in-depth computer knowledge. Users are therefore
* encouraged to load and test the software's suitability as regards their
* requirements in conditions enabling the security of their systems and/or
* data to be ensured and, more generally, to use and operate it in the
* same conditions as regards security.
*
* The fact that you are presently reading this means that you have had
* knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef NGRAPH_H
#define NGRAPH_H
// ===========================================================================
// Include Libraries
// ===========================================================================
#include <stdlib.h>
#include <stdio.h>
#include <vector>
#include <map>
#include <list>
#include <string>
#include <utility>
// ===========================================================================
// Include Project Files
// ===========================================================================
#include "CGraph.h"
#include "NNode.h"
#include "NEdge.h"
// ===========================================================================
// Class declarations
// ===========================================================================
// ===========================================================================
// Declare Used Namespaces
// ===========================================================================
using namespace std;
typedef pair<int,char> idx_dir;
typedef list<pair<int,char> > path_t;
typedef pair<int,char> idx_dir;
//! A class for the representation of De-Bruijn graphs
/*!
* Graph class containing a lot of informations about the De-Bruijn graph.
* Used after the decomposition in biconnected-component. Each BCC is a NGraph object to be
* analyzed
*/
class NGraph
{
public:
// =======================================================================
// Enums
// =======================================================================
// =======================================================================
// Constructors
// =======================================================================
NGraph( int kValue, int outputtedSnps );
NGraph( const NGraph & model ) = default;
NGraph( CGraph & g, vector<char*> & seqs, vector<LabelledCEdge> & all_edges, vector<CEdge> & edges );
~NGraph() = default;
// =======================================================================
// Accessors: getters
// =======================================================================
inline int getKValue( void ) const;
inline const string& getSequence( int node_id ) const;
//~ inline const vector<string>& getSequences( void ) const;
inline const list<NEdge>& getAdjList( int node_id ) const;
inline const vector<NNode>& getNodes( void ) const;
inline int getNbNodes( void ) const;
inline int getNbOutput( void ) const;
int get_in_degree( int node, char dir ) const;
int get_out_degree( int node, char dir ) const;
// =======================================================================
// Accessors: setters
// =======================================================================
inline void incrementNbOutput( void );// can only be incremented one per one
// =======================================================================
// Operators
// =======================================================================
// =======================================================================
// Public Methods
// =======================================================================
//! Insertion of a new edge
/*!
* \brief Insert a new edge from u to v with the corresponding label.
*
* Labels are {FF, RR, FR, RF}. It indicates which sequences of the two nodes overlap.
* If the edge already exists with a different label, it concatenates the new label to the previous one.
* If the edge already exists with the same label, do nothing.
*
* \param u the tail of the edge
* \param v the head of the edge
* \param label the label of the edge
*/
void insert_edge( string u, string v, string label );
//! Insertion of a new node
/*!
* \brief Insert a new node v
* \param v a string labelling the node
* \param seq the sequence of the node
*/
void insert_node( string v, string seq );
//! Insert an empty node
/*
* \param u : the label for the empty node
*/
void insert_empty_node( string u );
void insert_bidirected_edges( vector<LabelledCEdge>& all_edges, CEdge e );
void expand_parallel_edges( void );
//================= Bubble compression (from cycle compression)
void compress_all_bubbles( int *compressed_bubbles, FILE * snp_log_file, const int bccid, const bool output_context);
// Auxiliary functions
void find_and_compress_bubble( int u, char dir, bool *removed, FILE * snp_log_file, const int bccid, const bool output_context);
void fix_bubble_neighborhood( NNode& new_node, int new_idx, char new_dir, idx_dir open, idx_dir u, idx_dir l, bool *removed );
idx_dir close_bubble( idx_dir open, idx_dir upper, idx_dir lower ) const;
bool follow_path( idx_dir prev, idx_dir curr, idx_dir &next ) const ;
string merge_nodes( char dir, idx_dir u, idx_dir l );
void output_bubble( char dir, idx_dir u, idx_dir l, idx_dir open, idx_dir close, FILE *snp_log_file, const int bccid, const bool output_context);
// ============== Path compression methods
// Compress all linear path and output the graph to the files.
void compress_all_paths( void );
void compress_path( list<pair<int,char> > path, bool removed[]);
path_t find_maximal_linear_path( int start, char dir ) const;
int next_node( int start, char dir, char *next_dir ) const;
string get_path_sequence( path_t path );
void fix_neighborhood( NNode& new_node, int new_idx, int node, char dir, bool removed[]);
// ================ Writing methods
void print_graph_edges( FILE *stream, string (*node_label)(int),
bool *filter, bool value ) const;
void print_graph_nodes( FILE *stream, string (*node_label)(int),
bool *filter, bool value ) const;
void print_graph_edges_new( int *lines_written_edges, FILE *stream1, FILE *stream, string (*node_label)(int),
bool *filter, bool value ) const;
void print_graph_nodes_new( int *lines_written_nodes, FILE *stream1, FILE *stream, string (*node_label)(int),
bool *filter, bool value ) const;
list<NEdge>::iterator eraseAdjList( int node_id, int w );
inline const string node_to_str( int node_id ) const;
// =======================================================================
// Public Attributes
// =======================================================================
protected :
// =======================================================================
// Forbidden Constructors
// =======================================================================
NGraph() = delete;
// =======================================================================
// Protected Methods
// =======================================================================
// =======================================================================
// Protected Attributes
// =======================================================================
//! _kValue used for the graph construction
int _kValue;
//! Number of merged sequences, SNPs or sequencing errors merged with an N
int _nbOutput;
//! The nodes
vector<NNode> _nodes;
//! Hash to/from node_ids from/to node_labels
vector<string> _nodeToStr;
map<string, int> _strToNode;
};
// ===========================================================================
// Getters' definitions
// ===========================================================================
int NGraph::getKValue( void ) const
{
return _kValue;
}
const string& NGraph::getSequence( int node_id ) const
{
return _nodes[node_id].getSequence();
}
//~ const vector<string>& NGraph::getSequences( void ) const
//~ {
//~ return _nodeSequences;
//~ }
const list<NEdge>& NGraph::getAdjList( int node_id ) const
{
return _nodes[node_id].getAdjList();
}
const vector<NNode>& NGraph::getNodes( void ) const
{
return _nodes;
}
int NGraph::getNbNodes( void ) const
{
return _nodes.size();
}
int NGraph::getNbOutput( void ) const
{
return _nbOutput;
}
void NGraph::incrementNbOutput( void )
{
_nbOutput += 1;
}
// ===========================================================================
// Setters' definitions
// ===========================================================================
// ===========================================================================
// Inline Operators' definitions
// ===========================================================================
// ===========================================================================
// Inline functions' definition
// ===========================================================================
const string NGraph::node_to_str( int node_id ) const
{
return _nodeToStr[node_id];
}
//============== Reading methods
void read_graph_edges( NGraph &g, FILE *edge_file );
void read_graph_nodes( NGraph &g, FILE *node_file );
void read_graph_edges_new( NGraph &g, FILE *info_file, FILE *file_contents, FILE *data, int *required_sequence, int *file_index );
void read_graph_nodes_new( NGraph &g, FILE *info_file, FILE *file_contents, FILE *data, int *required_sequence, int *file_index );
NGraph remove_marked_nodes(NGraph &g, bool *filter, bool value);
// Only use in DEBUG
void print_path(list<pair<int,char> > path);
#endif /* NGRAPH_H */
|