
 !
Y-A                 @   s*  d  Z  d d l Z d d l Z d d l Z d d l Z d d l Z d Z d Z d Z d Z	 d Z
 d d	 d
  Z d d   Z d d   Z d d d  Z d d   Z d d d d  Z d d   Z d d   Z d d   Z d d   Z Gd d    d  e j  Z Gd! d"   d" e j  Z Gd# d$   d$ e j  Z Gd% d&   d& e j  Z Gd' d(   d( e j  Z Gd) d*   d* e j  Z Gd+ d,   d, e j  Z Gd- d.   d. e j  Z Gd/ d0   d0 e j  Z Gd1 d2   d2 e j  Z Gd3 d4   d4 e j  Z  Gd5 d6   d6 e j  Z! e" d7 k r&e j#   d S)8a%  
test.py
Used to run tests on the test files found in /samples/
From root, execute using `python test/test.py`
First, ensure you have fully installed the pypairix package:
`pip install pypairix --user`
OR
`sudo python setup.py install`

If you're having trouble running this file, try installing
python-dev and zlib1g-dev.

Note: tests are run to anticipate either juicer-formatted pairs files or 4DN-
formatted pairs files.
The columns (given in form <attribute [col#]):
Juicer: chr1[1] pos1[2] chr2[5] pos2[6]
4DN: chr1[1] pos1[2] chr2[3] pos2[4]
    Nz/samples/merged_nodup.tab.chrblock_sorted.txt.gzz*samples/4dn.bsorted.chr21_22_only.pairs.gzzsamples/test_4dn.pairs.gzz-samples/SRR1171591.variants.snp.vqsr.p.vcf.gzz=samples/merged_nodups.space.chrblock_sorted.subsample1.txt.gz#c             C   sn   g  } xa t  j |   D]P } y | j d  } Wn t k
 rC Yn X| j |  r | j | j    q W| S)z8Read gzipped file and retrieve lines beginning with '#'.zutf-8)gzipopendecodeAttributeError
startswithappendrstrip)filenameZ	meta_charretvalline r   "/Users/soo/git/pairix/test/test.py
get_header#   s    r   c             C   s   g  } x t  j |   D]y } y | j d  } Wn t k
 rC Yn X| j d  r | j   j d  } | d } | d } | j | | g  q W| S)z)Read gzipped file and retrieve chromsize.zutf-8z#chromsize: z\s+      )r   r   r   r   r   r	   splitr   )r
   r   r   fieldsZchrnameZchrsizer   r   r   get_chromsize0   s    

r   c             C   s   g  } x t  j |   D]w } y | j d  } Wn t k
 rC Yn X| j   j d  } | d } | d } | d } | j | | | g  q W| S)z?Read a VCF file and return a list of [chrom, start, end] items.zutf-8	r   r   )r   r   r   r   r	   r   r   )r
   r   r   r   chromstartendr   r   r   read_vcf@   s    


r   r   c             C   s   d } d } x t  j |   D] } y | j d  } Wn t k
 rI Yn X| j   j |  } t | d  r t | d  r d } t | d  r t | d  r d } | r | r d S| r d Sq Wd	 S)
z{Attempt to determine if input pairs file is of type: juicer, 4DN,
    or undetermined. Do this by testing string values of Fzutf-8r      T   4DNjuicerundetermined)r   r   r   r   r	   r   is_str)r
   	delimiterZ	is_juicerZis_4DNr   r   r   r   r   find_pairs_typeP   s"      r!   c             C   s/   y t  |   d SWn t k
 r* d SYn Xd S)z?Helper function to see if a string is an int. Return True if soTFN)int
ValueError)sr   r   r   r   f   s
    
r   r   c       
      C   s   | d k r g  Sg  } x t  j |   D] } y | j d  } Wn t k
 rS Yn X| j   j |  } | d k r | d } | d } | d } | d }	 n4 | d k r | d } | d } | d	 } | d
 }	 | j | | | | |	 |	 g  q& W| S)zZRead a pairs file and return a list of [chrom1, start1, end1, chrom2, start2, end2] items.r   zutf-8r   r   r      r   r      r   )r   r   r   r   r	   r   r   )
r
   Z	file_typer    r   r   r   Zchrom1Zstart1chrom2start2r   r   r   
read_pairso   s*    






#r)   c             C   s.   t  |   t  |  k o- t  |  t  |  k S)N)r"   )Za0Za1Zb0Zb1r   r   r   overlap1   s    r*   c             C   sU   g  } xH |  D]@ } | d | k r t  | d | d | |  r | j |  q W| S)Nr   r   r   )r*   r   )regionsr   r   r   r   rr   r   r   
get_result   s
    -r-   c       	      C   s   g  } xu |  D]m } | d | k r t  | d | d | |  r | d | k r t  | d | d | |  r | j |  q W| S)Nr   r   r   r&   r   r%   )r*   r   )	r+   r   r   r   r'   r(   end2r   r,   r   r   r   get_result_2D   s
    Zr/   c             C   sZ   | d k r" d d   |  D } n4 | d k rD d d   |  D } n | d k rV g  } | S)z=Build results using the pairix iterator based on the filetyper   c          	   S   s@   g  |  ]6 } | d  | d | d | d | d | d g  q S)r   r   r%   r   r   ).0xr   r   r   
<listcomp>   s   	 z#build_it_result.<locals>.<listcomp>r   c          	   S   s@   g  |  ]6 } | d  | d | d | d | d | d g  q S)r   r   r&   r   r   )r0   r1   r   r   r   r2      s   	 r   r   )itf_type	pr_resultr   r   r   build_it_result   s    r6   c               @   sv   e  Z d  Z e e  Z d Z d Z d Z e	 e e e e  Z
 e j e  Z d d   Z d d   Z d d	   Z d
 S)
PairixTestZchr10iXe  ic             C   sK   |  j  j |  j |  j |  j  } d d   | D } |  j |  j |  d  S)Nc             S   s+   g  |  ]! } | d  | d | d g  q S)r   r   r   )r0   r1   r   r   r   r2      s   	 z)PairixTest.test_query.<locals>.<listcomp>)prqueryr   r   r   assertEqualresult)selfr3   r5   r   r   r   
test_query   s    !zPairixTest.test_queryc             C   sZ   d j  |  j |  j |  j  } |  j j |  } d d   | D } |  j |  j |  d  S)Nz{}:{}-{}c             S   s+   g  |  ]! } | d  | d | d g  q S)r   r   r   )r0   r1   r   r   r   r2      s   	 z*PairixTest.test_querys.<locals>.<listcomp>)formatr   r   r   r8   querysr:   r;   )r<   r9   r3   r5   r   r   r   test_querys   s    zPairixTest.test_querysc             C   sy   t  j t d d t  j t  } d j |  j |  j |  j  } | j |  } d d   | D } |  j	 |  j
 |  d  S)Nforcer   z{}:{}-{}c             S   s+   g  |  ]! } | d  | d | d g  q S)r   r   r   )r0   r1   r   r   r   r2      s   	 z>PairixTest.test_build_index_with_force_vcf.<locals>.<listcomp>)pypairixbuild_indexTEST_FILE_1Dr   r>   r   r   r   r?   r:   r;   )r<   pr2r9   it2
pr2_resultr   r   r   test_build_index_with_force_vcf   s    z*PairixTest.test_build_index_with_force_vcfN)__name__
__module____qualname__r   rD   r+   r   r   r   r-   r;   rB   r   r8   r=   r@   rH   r   r   r   r   r7      s   r7   c               @   s   e  Z d  Z e e  Z e e e  Z d Z d Z	 d Z
 d Z e e e e	 e
 e d e j  Z e j e  Z d d   Z d S)	PairixTest_210iXe  i20r   c             C   s_   d j  |  j |  j |  j |  j  } |  j j |  } t | |  j  } |  j	 |  j
 |  d  S)Nz{}:{}-{}|{})r>   r   r   r   r'   r8   querys2Dr6   r4   r:   r;   )r<   r9   r3   r5   r   r   r   r@      s    $zPairixTest_2.test_querysN)rI   rJ   rK   r!   TEST_FILE_2Dr4   r)   r+   r   r   r   r'   r/   sysmaxsizer;   rB   r   r8   r@   r   r   r   r   rL      s   !rL   c               @   s   e  Z d  Z e e  Z e e e  Z d Z d Z	 d Z
 d Z d Z d Z e e e e	 e
 e e e  Z e j e  Z d d   Z d	 d
   Z d d   Z d d   Z d S)PairixTest2DrM   r   i@B rN   ii c             C   s\   |  j  j |  j |  j |  j |  j |  j |  j  } t | |  j	  } |  j
 |  j |  d  S)N)r8   query2Dr   r   r   r'   r(   r.   r6   r4   r:   r;   )r<   r3   r5   r   r   r   test_query2   s    3zPairixTest2D.test_query2c             C   sk   d j  |  j |  j |  j |  j |  j |  j  } |  j j |  } t	 | |  j
  } |  j |  j |  d  S)Nz{}:{}-{}|{}:{}-{})r>   r   r   r   r'   r(   r.   r8   rO   r6   r4   r:   r;   )r<   r9   r3   r5   r   r   r   test_querys_2   s    0zPairixTest2D.test_querys_2c          
   C   s   d j  |  j |  j |  j |  j |  j |  j  } t j d d  ] } t j	 d  |  j
 j |  } t |  d k s| t  t | d j t j  s t  Wd  QRXd  S)Nz{}:{}-{}|{}:{}-{}recordTalwaysr   )r>   r   r   r   r'   r(   r.   warningscatch_warningssimplefilterr8   rO   lenAssertionError
issubclasscategoryrB   PairixWarning)r<   r9   wr3   r   r   r   test_querys_2_bad_order   s    0z$PairixTest2D.test_querys_2_bad_orderc             C   s   t  j t d d d d d d d d d d	 d
 d	 d d t  j t  } d j |  j |  j |  j |  j |  j	 |  j
  } | j |  } t | |  j  } |  j |  j |  d  S)NZscr   Zbcr&   ZecZsc2r   Zbc2   Zec2rA   r   z{}:{}-{}|{}:{}-{})rB   rC   rP   r   r>   r   r   r   r'   r(   r.   rO   r6   r4   r:   r;   )r<   rE   r9   rF   rG   r   r   r   -test_build_index_with_force_merged_nodups_tab   s    70z:PairixTest2D.test_build_index_with_force_merged_nodups_tabN)rI   rJ   rK   r!   rP   r4   r)   r+   r   r   r   r'   r(   r.   r/   r;   rB   r   r8   rU   rV   rc   re   r   r   r   r   rS      s   rS   c               @   s   e  Z d  Z e e  Z e e e  Z d Z d Z	 d Z
 d Z d Z d Z e e e e	 e
 e e e  Z e j e  Z d d   Z d	 d
   Z d d   Z d S)PairixTest2D_reverserM   r   i@B rN   ii c             C   s_   |  j  j |  j |  j |  j |  j |  j |  j d  } t | |  j	  } |  j
 |  j |  d  S)Nr   )r8   rT   r   r   r   r'   r(   r.   r6   r4   r:   r;   )r<   r3   r5   r   r   r   test_query2_rev  s    6z$PairixTest2D_reverse.test_query2_revc             C   sn   d j  |  j |  j |  j |  j |  j |  j  } |  j j | d  } t	 | |  j
  } |  j |  j |  d  S)Nz{}:{}-{}|{}:{}-{}r   )r>   r   r   r   r'   r(   r.   r8   rO   r6   r4   r:   r;   )r<   r9   r3   r5   r   r   r   test_querys_2_rev  s    0z&PairixTest2D_reverse.test_querys_2_revc             C   s   t  j d d  ~ } t  j d  |  j j |  j |  j |  j |  j |  j	 |  j
  } t |  d k sm t  t | d j t j  s t  Wd  QRXd  S)NrW   TrX   r   rY   )rZ   r[   r\   r8   rT   r   r   r   r'   r(   r.   r]   r^   r_   r`   rB   ra   )r<   rb   r3   r   r   r   test_query2_rev_fail  s
    3z)PairixTest2D_reverse.test_query2_rev_failN)rI   rJ   rK   r!   rP   r4   r)   r+   r'   r(   r.   r   r   r   r/   r;   rB   r   r8   rg   rh   ri   r   r   r   r   rf     s   rf   c               @   s   e  Z d  Z e e  Z e e e  Z d Z d Z	 d Z
 d Z d Z d Z e e e e	 e
 e e e  Z e j e  Z d d   Z d d	   Z d
 d   Z d d   Z d S)PairixTest2D_4DNchr21r   iggchr22ivc             C   s\   |  j  j |  j |  j |  j |  j |  j |  j  } t | |  j	  } |  j
 |  j |  d  S)N)r8   rT   r   r   r   r'   r(   r.   r6   r4   r:   r;   )r<   r3   r5   r   r   r   test_query2_4dn8  s    3z PairixTest2D_4DN.test_query2_4dnc             C   sk   d j  |  j |  j |  j |  j |  j |  j  } |  j j |  } t	 | |  j
  } |  j |  j |  d  S)Nz{}:{}-{}|{}:{}-{})r>   r   r   r   r'   r(   r.   r8   rO   r6   r4   r:   r;   )r<   r9   r3   r5   r   r   r   test_querys_2_4dn=  s    0z"PairixTest2D_4DN.test_querys_2_4dnc          
   C   sn   |  j  t j   } t j t  Wd  QRXt j d k rT |  j | j j	   d  n |  j | j j
 d  d  S)Nr&   r   z7The index file exists. Please use force=1 to overwrite.)r&   r   )ZassertRaisesrB   ZPairixErrorrC   TEST_FILE_2D_4DNrQ   version_infor:   Z	exception__str__message)r<   errorr   r   r   test_build_index_without_forceC  s
    z/PairixTest2D_4DN.test_build_index_without_forcec             C   s   t  j t d d t  j t  } d j |  j |  j |  j |  j |  j	 |  j
  } | j |  } t | |  j  } |  j |  j |  d  S)NrA   r   z{}:{}-{}|{}:{}-{})rB   rC   ro   r   r>   r   r   r   r'   r(   r.   rO   r6   r4   r:   r;   )r<   rE   r9   rF   rG   r   r   r   test_build_index_with_forceM  s    0z,PairixTest2D_4DN.test_build_index_with_forceN)rI   rJ   rK   r!   ro   r4   r)   r+   r   r   r   r'   r(   r.   r/   r;   rB   r   r8   rm   rn   rt   ru   r   r   r   r   rj   +  s   
rj   c               @   s   e  Z d  Z e e d  Z e e e d  Z d Z d Z	 d Z
 d Z d Z d Z e e e e	 e
 e e e  Z e j e  Z d d	   Z d
 d   Z d d   Z d S)PairixTest2DSpace rM   r   i@B rN   ii c             C   s\   |  j  j |  j |  j |  j |  j |  j |  j  } t | |  j	  } |  j
 |  j |  d  S)N)r8   rT   r   r   r   r'   r(   r.   r6   r4   r:   r;   )r<   r3   r5   r   r   r   rU   c  s    3zPairixTest2DSpace.test_query2c             C   sk   d j  |  j |  j |  j |  j |  j |  j  } |  j j |  } t	 | |  j
  } |  j |  j |  d  S)Nz{}:{}-{}|{}:{}-{})r>   r   r   r   r'   r(   r.   r8   rO   r6   r4   r:   r;   )r<   r9   r3   r5   r   r   r   rV   h  s    0zPairixTest2DSpace.test_querys_2c             C   s   t  j t d d d t  j t  } d j |  j |  j |  j |  j |  j	 |  j
  } | j |  } t | |  j  } |  j |  j |  d  S)NZmerged_nodupsrA   r   z{}:{}-{}|{}:{}-{})rB   rC   TEST_FILE_2D_SPACEr   r>   r   r   r   r'   r(   r.   rO   r6   r4   r:   r;   )r<   rE   r9   rF   rG   r   r   r   )test_build_index_with_force_merged_nodupsn  s    0z;PairixTest2DSpace.test_build_index_with_force_merged_nodupsN)rI   rJ   rK   r!   rx   r4   r)   r+   r   r   r   r'   r(   r.   r/   r;   rB   r   r8   rU   rV   ry   r   r   r   r   rv   W  s   rv   c               @   s   e  Z d  Z d d   Z d S)PairixTestBlocknamesc             C   s   t  j t  } | j   } | j   g  } t t  } t t |  } x+ | D]# } | j | d d | d  qM Wt t	 |   } | j   |  j
 | |  d  S)Nr   |r&   )rB   r   rP   Zget_blocknamessortr!   r)   r   listsetr:   )r<   r8   Zretrieved_blocklistZ	blocklistr4   r+   aZblocklist_uniqr   r   r   test_blocknamesx  s    
!
z$PairixTestBlocknames.test_blocknamesN)rI   rJ   rK   r   r   r   r   r   rz   v  s   rz   c               @   s   e  Z d  Z d d   Z d S)PairixTestGetColumnIndexc             C   s*  t  j t  } t  j t  } |  j | j   d  |  j | j   d  |  j | j   d  |  j | j   d  |  j | j	   d  |  j | j
   d  |  j | j   d  |  j | j   d  |  j | j   d  |  j | j   d  |  j | j	   d  |  j | j
   d  d  S)Nr   r%   r   r   r&   r   )rB   r   rP   ro   r:   Zget_chr1_colZget_chr2_colZget_startpos1_colZget_startpos2_colZget_endpos1_colZget_endpos2_col)r<   r8   rE   r   r   r   test_columnindex  s    z)PairixTestGetColumnIndex.test_columnindexN)rI   rJ   rK   r   r   r   r   r   r     s   r   c               @   s   e  Z d  Z d d   Z d S)PairixTestExistsc             C   s   t  j t  } |  j | j d  d  |  j | j d  d  |  j | j d  d  |  j | j d  d  |  j | j d  d  |  j | j d  d  |  j | j d	  d  d  S)
Nzchr21|chr21r   zchr21|chr22zchr22|chr22zchr22|chr21r   z	chr1|chr2rk   z1|2)rB   r   ro   r:   exists)r<   r8   r   r   r   test_exists  s    zPairixTestExists.test_existsN)rI   rJ   rK   r   r   r   r   r   r     s   r   c               @   s   e  Z d  Z d d   Z d S)PairixTestExists2c             C   s   t  j t  } |  j | j d d  d  |  j | j d d  d  |  j | j d d  d  |  j | j d d  d  |  j | j d d  d  |  j | j d d  d  d  S)	Nrk   r   rl   r   Zchr1Zchr212)rB   r   ro   r:   Zexists2)r<   r8   r   r   r   test_exists2  s    zPairixTestExists2.test_exists2N)rI   rJ   rK   r   r   r   r   r   r     s   r   c               @   s   e  Z d  Z d d   Z d S)PairixTestGetHeaderc             C   sZ   t  j t  } |  j | j   t t   t  j t  } |  j | j   t t   d  S)N)rB   r   ro   r:   r   TEST_FILE_2D_4DN_2)r<   r8   r   r   r   tet_get_header  s    z"PairixTestGetHeader.tet_get_headerN)rI   rJ   rK   r   r   r   r   r   r     s   r   c               @   s   e  Z d  Z d d   Z d S)PairixTestGetChromsizec             C   sZ   t  j t  } |  j | j   t t   t  j t  } |  j | j   t t   d  S)N)rB   r   ro   r:   r   r   )r<   r8   r   r   r   r     s    z%PairixTestGetChromsize.tet_get_headerN)rI   rJ   rK   r   r   r   r   r   r     s   r   __main__)$__doc__Zunittestr   rQ   rB   rZ   rP   ro   r   rD   rx   r   r   r   r!   r   r)   r*   r-   r/   r6   ZTestCaser7   rL   rS   rf   rj   rv   rz   r   r   r   r   r   rI   mainr   r   r   r   <module>   sD   	-&,		