File: NGraph.h

package info (click to toggle)
kissplice 2.6.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 16,752 kB
  • sloc: cpp: 8,783; python: 1,618; perl: 389; sh: 72; makefile: 18
file content (325 lines) | stat: -rw-r--r-- 12,191 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
/* ***************************************************************************
 *
 *                              KisSplice
 *      de-novo calling alternative splicing events from RNA-seq data.
 *
 * ***************************************************************************
 *
 * Copyright INRIA 
 *  contributors :  Vincent Lacroix
 *                  Pierre Peterlongo
 *                  Gustavo Sacomoto
 *                  Vincent Miele
 *                  Alice Julien-Laferriere
 *                  David Parsons
 *
 * pierre.peterlongo@inria.fr
 * vincent.lacroix@univ-lyon1.fr
 *
 * This software is a computer program whose purpose is to detect alternative
 * splicing events from RNA-seq data.
 *
 * This software is governed by the CeCILL license under French law and
 * abiding by the rules of distribution of free software. You can  use,
 * modify and/ or redistribute the software under the terms of the CeCILL
 * license as circulated by CEA, CNRS and INRIA at the following URL
 * "http://www.cecill.info".

 * As a counterpart to the access to the source code and  rights to copy,
 * modify and redistribute granted by the license, users are provided only
 * with a limited warranty  and the software's author,  the holder of the
 * economic rights,  and the successive licensors  have only  limited
 * liability.

 * In this respect, the user's attention is drawn to the risks associated
 * with loading,  using,  modifying and/or developing or reproducing the
 * software by the user in light of its specific status of free software,
 * that may mean  that it is complicated to manipulate,  and  that  also
 * therefore means  that it is reserved for developers  and  experienced
 * professionals having in-depth computer knowledge. Users are therefore
 * encouraged to load and test the software's suitability as regards their
 * requirements in conditions enabling the security of their systems and/or
 * data to be ensured and,  more generally, to use and operate it in the
 * same conditions as regards security.
 *
 * The fact that you are presently reading this means that you have had
 * knowledge of the CeCILL license and that you accept its terms.
 */




#ifndef NGRAPH_H
#define NGRAPH_H


// ===========================================================================
//                               Include Libraries
// ===========================================================================
#include <stdlib.h>
#include <stdio.h>
#include <vector>
#include <map>
#include <list>
#include <string>
#include <utility>



// ===========================================================================
//                             Include Project Files
// ===========================================================================
#include "CGraph.h"
#include "NNode.h"
#include "NEdge.h"




// ===========================================================================
//                              Class declarations
// ===========================================================================




// ===========================================================================
//                             Declare Used Namespaces
// ===========================================================================
using namespace std;



typedef pair<int,char> idx_dir;

typedef list<pair<int,char> > path_t;
typedef pair<int,char> idx_dir;

//! A class for the representation of De-Bruijn graphs
/*!
 * Graph class containing a lot of informations about the De-Bruijn graph.
 *  Used after the decomposition in biconnected-component. Each BCC is a NGraph object to be
 * analyzed
 */
class NGraph
{
  public:
    
    // =======================================================================
    //                                 Enums
    // =======================================================================
    
    // =======================================================================
    //                               Constructors
    // =======================================================================
    NGraph( int kValue, int outputtedSnps );
    NGraph( const NGraph & model ) = default;
    NGraph( CGraph & g, vector<char*> & seqs, vector<LabelledCEdge> & all_edges, vector<CEdge> & edges );
    ~NGraph() = default;

    // =======================================================================
    //                            Accessors: getters
    // =======================================================================
    inline int getKValue( void ) const;
    inline const string& getSequence( int node_id ) const;
    //~ inline const vector<string>& getSequences( void ) const;
    inline const list<NEdge>& getAdjList( int node_id ) const;
    inline const vector<NNode>& getNodes( void ) const;
    inline int getNbNodes( void ) const;
    inline int getNbOutput( void ) const;
    
    int get_in_degree( int node, char dir ) const;
    int get_out_degree( int node, char dir ) const;

    // =======================================================================
    //                            Accessors: setters
    // =======================================================================
    inline void incrementNbOutput( void );// can only be incremented one per one
    // =======================================================================
    //                                Operators
    // =======================================================================

     // =======================================================================
     //                              Public Methods
     // =======================================================================
    //! Insertion of a new edge
    /*!
     * \brief Insert a new edge from u to v with the corresponding label.
     *
     * Labels are {FF, RR, FR, RF}. It indicates which sequences of the two nodes overlap.
     * If the edge already exists with a different label, it concatenates the new label to the previous one.
     * If the edge already exists with the same label, do nothing.
     *
     * \param u  the tail of the edge
     * \param v	the head of the edge
     * \param label the label of the edge
     */
    void insert_edge( string u, string v, string label );
    
    
    //! Insertion of a new node
    /*!
     * \brief Insert a new node v
     * \param v a string  labelling the node
     * \param seq the sequence of  the node
     */
    void insert_node( string v, string seq );
    //! Insert an empty node
    /*
     * \param u : the label for the empty node
     */
    void insert_empty_node( string u );
    
    void insert_bidirected_edges( vector<LabelledCEdge>& all_edges, CEdge e );
    void expand_parallel_edges( void );


    //================= Bubble compression (from cycle compression)
    void compress_all_bubbles( int *compressed_bubbles, FILE * snp_log_file, const int bccid, const bool output_context);
    // Auxiliary functions
    void find_and_compress_bubble( int u, char dir, bool *removed, FILE * snp_log_file, const int bccid, const bool output_context);
    void fix_bubble_neighborhood( NNode& new_node, int new_idx, char new_dir, idx_dir open, idx_dir u, idx_dir l, bool *removed );
    idx_dir close_bubble( idx_dir open, idx_dir upper, idx_dir lower ) const;
    bool follow_path( idx_dir prev, idx_dir curr, idx_dir &next ) const ;
    string merge_nodes( char dir, idx_dir u, idx_dir l );
    void output_bubble( char dir, idx_dir u, idx_dir l, idx_dir open, idx_dir close, FILE *snp_log_file, const int bccid, const bool output_context);


    // ============== Path compression methods
    // Compress all linear path and output the graph to the files.
    void compress_all_paths( void );
    void compress_path( list<pair<int,char> > path, bool removed[]);
    path_t find_maximal_linear_path( int start, char dir ) const;
    int next_node( int start, char dir, char *next_dir ) const;
    string get_path_sequence( path_t path );

    void fix_neighborhood( NNode& new_node, int new_idx, int node, char dir, bool removed[]);


    // ================ Writing methods
    void print_graph_edges( FILE *stream, string (*node_label)(int),
        bool *filter, bool value ) const;
    void print_graph_nodes( FILE *stream, string (*node_label)(int),
        bool *filter, bool value ) const;
    void print_graph_edges_new( int *lines_written_edges, FILE *stream1, FILE *stream, string (*node_label)(int),
        bool *filter, bool value ) const;
    void print_graph_nodes_new( int *lines_written_nodes, FILE *stream1, FILE *stream, string (*node_label)(int),
        bool *filter, bool value ) const;
        
        
        
    list<NEdge>::iterator eraseAdjList( int node_id, int w );
    
    
    inline const string node_to_str( int node_id ) const;


    // =======================================================================
    //                             Public Attributes
    // =======================================================================



  protected :

    // =======================================================================
    //                            Forbidden Constructors
    // =======================================================================
    NGraph() = delete;

    // =======================================================================
    //                              Protected Methods
    // =======================================================================

    // =======================================================================
    //                             Protected Attributes
    // =======================================================================
    
    //! _kValue used for the graph construction
    int _kValue;
    //! Number of merged sequences, SNPs or sequencing errors merged with an N
    int _nbOutput;
    //! The nodes
    vector<NNode> _nodes;

    //! Hash to/from node_ids from/to node_labels
    vector<string> _nodeToStr;
    map<string, int> _strToNode;
};


// ===========================================================================
//                              Getters' definitions
// ===========================================================================
int NGraph::getKValue( void ) const
{
  return _kValue;
}

const string& NGraph::getSequence( int node_id ) const
{
  return _nodes[node_id].getSequence();
}

//~ const vector<string>& NGraph::getSequences( void ) const
//~ {
  //~ return _nodeSequences;
//~ }

const list<NEdge>& NGraph::getAdjList( int node_id ) const
{
  return _nodes[node_id].getAdjList();
}

const vector<NNode>& NGraph::getNodes( void ) const
{
  return _nodes;
}

int NGraph::getNbNodes( void ) const
{
  return _nodes.size();
}

int NGraph::getNbOutput( void ) const
{
 return _nbOutput;
}

void NGraph::incrementNbOutput( void )
{
  _nbOutput += 1;
}
// ===========================================================================
//                              Setters' definitions
// ===========================================================================


// ===========================================================================
//                          Inline Operators' definitions
// ===========================================================================

// ===========================================================================
//                          Inline functions' definition
// ===========================================================================
const string NGraph::node_to_str( int node_id ) const
{
  return _nodeToStr[node_id];
}



//============== Reading methods
void read_graph_edges( NGraph &g, FILE *edge_file );
void read_graph_nodes( NGraph &g, FILE *node_file );

void read_graph_edges_new( NGraph &g, FILE *info_file,  FILE *file_contents, FILE *data, int *required_sequence, int *file_index );
void read_graph_nodes_new( NGraph &g, FILE *info_file,  FILE *file_contents, FILE *data, int *required_sequence, int *file_index );


NGraph remove_marked_nodes(NGraph &g, bool *filter, bool value);

// Only use in DEBUG
void print_path(list<pair<int,char> > path);
#endif /* NGRAPH_H */