3
y^                 @   s   d dl mZmZmZmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZ d dlmZ d dlmZ d dlZejeZdZdd	d
Zdd Zdd Zdd Zdd Zdd ZdS )    )absolute_importdivisionprint_functionunicode_literalsN)Compact_graph_whole)Compact_graph_partial)Compact_graph_prunerg?F   c       	      C   sF  t jdj|  | jd|}tjr<|jd t jdj| t }|j| tjr\|jd |j	|d tjr~|jdjd t
 }|j	| tjr|jd t jd	j| t }|j|| tjr|jd
 t jdj| |j|| tjr|jd t jdj| |j| tjr:|jd t jdj| t|}|S )z
    Create a new splice graph based on the node alignment obj.

    Since some nodes may show up as duplicate (repeat) nodes, assign each a unique ID
    zrefine_alignment({})z^^SGRAPH2^^zladeda.pre.dotz# pre-refinement tgraph:
{}zladeda.linear_compact.dotr   zladeda.compact.m{}.dotzladeda.partial_compaction.dotz@# post-refinement partial (suffix/prefix adjustments) tgraph:
{}zladeda.burr_removal.dotz# removing burrs tgraph:
{}zladeda.bubble_popping.dotz# bubbles popped tgraph:
{}zladeda.final.dotz# final tgraph:
{})loggerdebugformatto_splice_graphTGLOBALSDEBUG
draw_graphr   Zcompact_unbranchedZcompact_graphr   r   Zremove_burrsZpop_small_bubblessplice_graph_to_node_alignment)	Znode_alignment_objreset_node_idsmax_burr_lengthmax_bubble_pop_lengthrefined_tgraphZgraph_compactorZpartial_graph_compactorZcompact_graph_prunersplice_graph_node_alignment r   X/broad/hptmp/bhaas/trinityrnaseq/Analysis/SuperTranscripts/pylib/Splice_model_refiner.pyrefine_alignment   s@    	









r   c             C   s  t j j| j }tjdt|  t }x*tdt|D ]}|| j	 }|||< q8W t
 }t }x|D ]}|j|j }qfW t
|}xV|D ]N}dd |D }	x0|D ](}||j kr|j	 }|| }
||	|
< qW |j|	 qW tj| j ||}tjdt|  |S )NzTopologically sorted nodes: r   c             S   s   g | ]}d qS )Nr   ).0ir   r   r   
<listcomp>z   s    z2splice_graph_to_node_alignment.<locals>.<listcomp>zSplice graph node_alignment: )Topological_sorttopologically_sortget_all_nodesr
   r   strdictrangelen
get_loc_idlistsetunionget_transcriptsappendNode_alignmentget_gene_id)tgraphtopologically_sorted_nodesaligned_loc_id_posr   loc_idnew_alignmentstranscript_idsnodetranscript_idnew_alignmentnew_idxr   r   r   r   r   e   s,    


r   c             C   s.  | j  }| j }| j }t|}t }xptdt|d D ]Z}xTt|d t|D ]>}t|| || rt|j| qRt|| || rR|j| qRW q:W |rt }t }	x:tdt|D ](}||kr|j	||  |	j	||  qW t
j
|||	}
t|}tjdj|||| d  |
S tjd | S d S )Nr      z<Containments found, reporting reduced set {} of {} = {:.2f}%d   zNo containments found)r+   get_transcript_namesget_aligned_nodesr#   r&   r"   a_contains_baddr%   r)   r*   r
   r   r   )Znode_alignmentgene_idtranscript_namesaligned_nodesZ num_transcripts_before_reductionZcontainmentsr   jZadj_transcript_namesZadj_aligned_nodesZadj_splice_graph_node_alignmentZnum_after_reductionr   r   r   remove_redundant_paths   s4    
r@   c             C   sJ   d}x(t dt| D ]}| | d k	r|}P qW |dk rFtdjtt|S )Nr6   r   z7Error, didn't find first non-none value among {} and {})r"   r#   RuntimeErrorr   node_list_Anode_list_B)	node_listZ	begin_idxr   r   r   r   get_first_node_idx   s    rF   c             C   sN   d}x,t tdt| D ]}| | d k	r|}P qW |dk rJtdjtt|S )Nr6   r   z6Error, didn't find last non-none value among {} and {}rA   )reversedr"   r#   rB   r   rC   rD   )rE   Zend_idxr   r   r   r   get_end_node_idx   s    rH   c             C   sd   t | }t| }t |}t|}||ko.||ks4dS x*t||d D ]}| | || krDdS qDW dS )NFr6   T)rF   rH   r"   )rC   rD   ZA_startZA_endZB_startZB_endr   r   r   r   r:      s    r:   )Fr	   r	   )
__future__r   r   r   r   ossysreloggingTGraphTNoder*   r   r   r   r   r   	getLogger__name__r
   ZMAX_MM_RATEr   r   r@   rF   rH   r:   r   r   r   r   <module>   s&   
 
I%+