File: SplitBcc.cpp

package info (click to toggle)
kissplice 2.6.7-2
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 16,752 kB
sloc: cpp: 8,783; python: 1,618; perl: 389; sh: 72; makefile: 18
file content (210 lines) | stat: -rw-r--r-- 7,824 bytes
parent folder | download | duplicates (2)
/* ***************************************************************************
 *
 *                              KisSplice
 *      de-novo calling alternative splicing events from RNA-seq data.
 *
 * ***************************************************************************
 *
 * Copyright INRIA
 *  contributors :  Vincent Lacroix
 *                  Pierre Peterlongo
 *                  Gustavo Sacomoto
 *                  Vincent Miele
 *                  Alice Julien-Laferriere
 *                  David Parsons
 *
 * pierre.peterlongo@inria.fr
 * vincent.lacroix@univ-lyon1.fr
 *
 * This software is a computer program whose purpose is to detect alternative
 * splicing events from RNA-seq data.
 *
 * This software is governed by the CeCILL license under French law and
 * abiding by the rules of distribution of free software. You can  use,
 * modify and/ or redistribute the software under the terms of the CeCILL
 * license as circulated by CEA, CNRS and INRIA at the following URL
 * "http://www.cecill.info".

 * As a counterpart to the access to the source code and  rights to copy,
 * modify and redistribute granted by the license, users are provided only
 * with a limited warranty  and the software's author,  the holder of the
 * economic rights,  and the successive licensors  have only  limited
 * liability.

 * In this respect, the user's attention is drawn to the risks associated
 * with loading,  using,  modifying and/or developing or reproducing the
 * software by the user in light of its specific status of free software,
 * that may mean  that it is complicated to manipulate,  and  that  also
 * therefore means  that it is reserved for developers  and  experienced
 * professionals having in-depth computer knowledge. Users are therefore
 * encouraged to load and test the software's suitability as regards their
 * requirements in conditions enabling the security of their systems and/or
 * data to be ensured and,  more generally, to use and operate it in the
 * same conditions as regards security.
 *
 * The fact that you are presently reading this means that you have had
 * knowledge of the CeCILL license and that you accept its terms.
 */


// ===========================================================================
//                               Include Libraries
// ===========================================================================
#include <stdio.h>
#include <list>
#include <vector>
#include <stack>

// ===========================================================================
//                             Include Project Files
// ===========================================================================
#include "CGraph.h"
#include "CEdge.h"
#include "SplitBcc.h"

// ===========================================================================
//                             Declare Used Namespaces
// ===========================================================================
using namespace std;

//================================ Methods for finding all the BCCs of the graph
/*!
 \brief Extract the new bcc founded.
 \param g the graph we are working on
 \param edge: the edge where to stop the bcc extraction
 \param backtrace_lifo: the stack of the already visited edges(last-in first-out)
 \param g_bcc: a vector of the current bccs founded in the object

 At the end of output_component, g_bcc will have a new bcc.
 edge is the reference edge, as it was encounter twice, it defines the bcc.
 output_component will extract all the edges of the stack backtrace_lifo:
 from the top to the edge edge.
 */

void output_component( CGraph &g, CEdge edge, stack<CEdge>& backtrace_lifo,
		       vector< vector<CEdge> >& g_bcc )
{
  // Components with only 2 nodes cannot contain a bubble, 
  // nothing to do for those
  if ( edge == backtrace_lifo.top() || edge.swap_ends() == backtrace_lifo.top() )
  {
    backtrace_lifo.pop();
    return;
  }

  vector<CEdge> new_bcc;
  CEdge top;
  // extract the whole bcc: all the edges from top to edge
  do
  {
    top = backtrace_lifo.top();
    int u = top.getFirst();
    int v = top.getSecond();

    new_bcc.push_back( CEdge( u ,v ) );
    backtrace_lifo.pop();
  }
  while ( edge != top && edge.swap_ends() != top );

  g_bcc.push_back( new_bcc );
}


/*!
  \brief Depth first search algorithm (dfs) on the cgraph object from a node: node_id

  \param g The graph (compressed) we are working on
  \param node_id The index of the node we are visiting
  \param backtrace_lifo: the already visited edges
  \param g_bcc: a vector of the current bcc founded in the object
  \param visited: a vector of boolean, defines if a node was already visited in the
  depth_first search algorithm (default FALSE)
  \param parent: a vector, the node from where the dfs algorithm arrived to the
   current node,  default -1.
  \param dfs_tree_node_id: the order in which the nodes are reached during the
  dfs (depth of the node)
  \param low_point: a vector, the low point value for each node
  \param node_id_counter: the number of nodes visited during the dfs algorithm.

   The depth-first search algorithm visits all nodes accessible from
node_id (i.e: all its successors) recursively.
 The nodes are numbered in the order they are reached during the search
with dfs_tree_node_id (depth).
The low_point value is the lowest depth of the neighbors of all descendants of the node
in the depth-first-search tree.
*/
void dfs_visit(CGraph &g, int node_id, stack<CEdge>& backtrace_lifo, vector<vector<CEdge> >& g_bcc, bool* &visited, int* &parent, int* &dfs_tree_node_id, int* &low_point, int node_id_counter)
{
  // update the visited state of the node, its depth and low-point
  visited[node_id] = true;
  low_point[node_id] = dfs_tree_node_id[node_id] = ++node_id_counter;

  int *p = g.get_adj_list(node_id);

  for ( int i = 0; i < g.get_adj_list_sz(node_id) ; i++, p++ )
  {
    int v = *p;
      
    if ( !visited[v] )
    {
      backtrace_lifo.push( CEdge( node_id, v ) );
      parent[v] = node_id;
      dfs_visit( g, v, backtrace_lifo, g_bcc, visited, parent, dfs_tree_node_id, low_point, node_id_counter);
      if (low_point[v] >= dfs_tree_node_id[node_id])
      {
        //if ... then node_id is an articulation point.
        // the bcc founded is extract with output_component
        output_component(g, CEdge( node_id, v ), backtrace_lifo, g_bcc );
      }

      low_point[node_id] = min(low_point[node_id],low_point[v]);
    }
    else if ( parent[node_id] != v && dfs_tree_node_id[v] < dfs_tree_node_id[node_id])
    // TODO: Can the second condition not be satisfied ?
    {
      // update low_point if v was already visited
      backtrace_lifo.push( CEdge( node_id, v ) );
      low_point[node_id] = min(low_point[node_id],dfs_tree_node_id[v]);
    }
    // else do nothing (we would be following the previous edge backwards)
  }
}

/*!
 \brief Extracts the bi-connected components of the CGraph object.
 Calls the dfs method on all the unvisited nodes of the CGraph object.
 \param g the graph we are working on
*/
vector<vector<CEdge> > find_bcc(CGraph &g)
{
  int nb_nodes = g.get_n_nodes();
  // Used by the recursive algorithm: dfs_visit
  stack<CEdge> backtrace_lifo;
  vector<vector<CEdge> > g_bcc;
  bool *visited = new bool[nb_nodes];
  int *parent   = new int[nb_nodes];
  int *dfs_tree_node_id = new int[nb_nodes];
  int *low_point = new int[nb_nodes];
  int node_id_counter = 0;

  for ( int i = 0 ; i < nb_nodes ; i++ )
  {
    visited[i] = false;
    parent[i]  = -1;
  }

  for ( int i = 0; i < nb_nodes ; i++ )
  {
    if (!visited[i])
    {
      dfs_visit(g, i, backtrace_lifo, g_bcc, visited, parent, dfs_tree_node_id, low_point, node_id_counter);
    }
  }
 
  delete[] visited;
  delete[] parent;
  delete[] dfs_tree_node_id;
  delete[] low_point;

  return g_bcc;
}