3
y^)                 @   s   d dl mZmZmZmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZejeZG dd dZdS )    )absolute_importdivisionprint_functionunicode_literalsNc               @   s<   e Zd ZdZdd Zdd Zdd Zdd	 Zed
d Z	dS )	Node_pathz
    Object representation of the connected set of Node objects that represent the reconstructed isoforms graph traversal

    Instance members:

        transcript_name : (str)  name of the isoform

        node_obj_list : (list) of Node objects

    c             C   s   || _ t | _tjd|}d}t }xn|D ]f}|jd\}	}
|
jd\}}t|}t|}|sjd}|	d7 }	|j||	|||d  }| jj| q*W dS )	zV
        constructor, instantiates Node_path and builds vertices in the graph
        z\d+:\d+\-\d+F:-Tfst   N)	transcript_namelistnode_obj_listrefindallsplitintget_nodeappend)selftgraphr   Zpath_stringsequenceZnode_descr_listZfirst_kmer_flagZobj_node_listZ
node_descrloc_node_idZnode_coord_rangeZlendZrendnode_obj r   M/broad/hptmp/bhaas/trinityrnaseq/Analysis/SuperTranscripts/pylib/Node_path.py__init__!   s     
zNode_path.__init__c             C   s   | j S )N)r   )r   r   r   r   get_transcript_name?   s    zNode_path.get_transcript_namec             C   s   | j S )N)r   )r   r   r   r   get_pathB   s    zNode_path.get_pathc             C   s2   t  }x| jD ]}|jt| qW dj|}|S )Nz--)r   r   r   strjoin)r   Znode_str_listnodepath_strr   r   r   __repr__F   s
    
zNode_path.__repr__c             C   s  t  }| j }xN|D ]F}|j }tjd|rtjdd|}| j|}|dk	r|j||f qW |sptj	d |S tj	dj
| t }t  }	x|D ]\}
}|
j }|j }|ddd }|ddd }tj||stdj
||t|t| }|dkr2||kstd	|j|
j  |g||
< |	j|
 q|d| }tj	d
j
||| |j|
j  |
|g||
< |
j| qW x|D ]x}|j }|d }||kr|| }t|dkr|d |d< n4t|dkr|d |d< |jd|d  ntdqW x|	D ]}| j| qW |S )aX  
        fst nodes will have an extra 5' sequence as compared to the corresponding non-fst nodes.

        If both the fst and non-fst version of the node exist, must modify the fst nodes so that
        they are separated from their 5' extension, and the core of the node (suffix) is shared.

        input: TGraph obj, list of node_path objects.

        The node_path objects are modified in-place as needed.
        A fst-node will be truncated to the unique prefix and the non-fst node will be integrated into the path.

        returns the node_path_list with any required adjustments

        r	    Nzno FST nodes to adjustzAdjusting FST nodes: {}r
   z8Error, core_node_seq:
{}
is not a suffix of fst seq:
{}
r   zGError, prefix starts at first position but sequences are not equivalentz<FST-SEQ-EXTRACTION

FSTseq:
{}

COREseq:
{}

PREFIXseq:
{}

   zshouldn't get herer%   )r   get_all_nodes
get_loc_idr   searchsubretrieve_noder   loggerdebugformatdictget_seqmatchRuntimeErrorlenAssertionErroradd_transcriptsget_transcriptsset_seqr   insert
prune_node)r   Znode_path_listZfst_nodes_require_adjZnodesr    node_idZcore_node_idZ	core_nodeZold_fst_node_to_new_fst_nodesZfst_nodes_to_deleteZfst_nodeZfst_node_seqZcore_node_seqZfst_node_seq_revZcore_node_seq_revZprefix_endptZprefix_stringZ	node_pathZ
first_nodeZreplacement_node_listr   r   r   adjust_for_fst_nodesP   s^    







zNode_path.adjust_for_fst_nodesN)
__name__
__module____qualname____doc__r   r   r   r"   staticmethodr:   r   r   r   r   r      s   

r   )
__future__r   r   r   r   ossysr   loggingargparsecollectionsnumpytimeTNodeZTrinity_util	getLoggerr;   r+   r   r   r   r   r   <module>   s   
