File: test_all_sam_dump_has_spotgroup.py

package info (click to toggle)
sra-sdk 2.10.9%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 38,576 kB
  • sloc: ansic: 211,129; cpp: 54,855; perl: 7,788; sh: 6,988; makefile: 5,174; python: 3,840; java: 2,363; yacc: 786; lex: 416; ruby: 329; lisp: 268; xml: 55
file content (99 lines) | stat: -rwxr-xr-x 2,747 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/python3

import sys, getopt, subprocess, multiprocessing


def run( cmd, lines, q_out ) :
    p = subprocess.Popen( cmd, stdout = subprocess.PIPE )
    n = 0
    bases = None
    while True :
        line = p.stdout.readline().decode( 'ascii' ).strip()
        if line != '' :
            n += 1
            if not line.startswith( '@' ) :
                q_out.put( line )
            if lines != None :
                if n > lines :
                    p.kill()
                    break
        else :
            break
    q_out.put( None )

def sam_dump_full( sam_dump, acc, spots, q_out ) :
    print( "starting: sam-dump" )
    run( [ sam_dump, '-u', '-g', acc ], spots, q_out )
    print( "sam-dump done" )


def count_lines( q_in, q_out ) :
    total = 0
    with_spotgrp = 0
    while True :
        line = q_in.get()
        if line != None :
            total += 1
            sam = line.split( '\t' )
            qname = sam[ 0 ]
            name_parts = qname.split( '.' )
            if len( name_parts ) > 1 :
                with_spotgrp += 1
        else :
            break
    res = ( total, with_spotgrp )
    q_out.put( res )


if __name__ == '__main__':
    print( "running: ", __file__ )

    if sys.version_info[ 0 ] < 3 :
        print( "does not work with python version < 3!" )
        sys.exit( 3 )

    acc = 'SRR3332402'
    spots = None
    sam_dump = 'sam-dump'
    
    short_opts = "ha:s:m:"
    long_opts = [ "acc=", "spots=", "sam_dump=" ]
    try :
        opts, args = getopt.getopt( sys.argv[ 1: ], short_opts, long_opts )
    except getopt.GetoptError :
        print( sys.argv[ 0 ], ' -a <accession> -s <spots> -m <sam-dump-binary>' )
        sys.exit( 2 )
    for opt, arg in opts :
        if opt == '-h' :
            print( sys.argv[ 0 ], ' -a <accession> -s <spots> -m <sam-dump-binary>' )
            sys.exit()
        elif opt in ( "-a", "--acc" ) :
            acc = arg
        elif opt in ( "-s", "--spots" ) :
            spots = int( arg )
        elif opt in ( "-m", "--sam_dump" ) :
            sam_dump = arg

    print( "accession = ", acc )
    if spots != None :
        print( "spots = ", spots )

    q1 = multiprocessing.Queue()
    q2 = multiprocessing.Queue()
    
    p1 = multiprocessing.Process( target = sam_dump_full, args = ( sam_dump, acc, spots, q1 ), )
    p2 = multiprocessing.Process( target = count_lines, args = ( q1, q2 ), )

    p1.start()
    p2.start()

    p1.join()
    p2.join()

    res = q2.get()
    print( "total        : ", res[ 0 ] )
    print( "with_spotgrp : ", res[ 1 ] )
    if res[ 0 ] != res[ 1 ] :
        print ( "not all sam-lines have a spotgroup in the QNAME-field!" )
        sys.exit( 3 )
    print( "success: ", __file__, "\n" )