File: mthread_test.py

package info (click to toggle)
smalt 0.7.6-13
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 72,824 kB
  • sloc: ansic: 26,340; sh: 3,826; python: 1,472; makefile: 323
file content (101 lines) | stat: -rw-r--r-- 3,248 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# test of multi-threaded execution

PROGNAM = "../src/smalt"

#DATA = ("contigs.fa.gz", "contigs_i3k_1.fq.gz", "contigs_i3k_2.fq.gz")
DATA = ("genome_1.fa.gz", "gen1l75i300e0_1.fq.gz", "gen1l75i300e0_2.fq.gz", int(10000))

KMER = 11
NSKIP = 11
NTHREADS = 2

TMPFIL_PREFIX = "TMPmthread"
MAPQ_THRESH = 6
VERBOSE = False

def smalt_index(df, index_name, fasta_name, kmer, nskip):
    from sys import exit
    from subprocess import call

    tup = (PROGNAM, 'index',
           '-k', '%i' % (int(kmer)),
           '-s', '%i' % (int(nskip)),
           index_name,
           fasta_name)
    df.call(tup, "when indexing")

def smalt_map(df, oufilnam, indexnam, readfil,
              matefil="", added_options=[], ouform="cigar",):
    from sys import exit
    from subprocess import call
    
    tup = [PROGNAM, 'map'];
    tup.extend(added_options);
    tup.extend(['-f', ouform,
                '-o', oufilnam,
                indexnam,
                readfil, matefil])

    df.call(tup, "when mapping")

def cmpCigarFiles(cigfilA, cigfilB, is_verbose=True):
    from formats import Cigar, openFile, getNextCigarPair
    cigA1 = Cigar()
    cigA2 = Cigar()
    cigB1 = Cigar()
    cigB2 = Cigar()
    
    filA = openFile(cigfilA, 'r')
    filB = openFile(cigfilB, 'r')
    ctr = 0
    while 1:
        (isOK, isEOF) = getNextCigarPair(filA, cigA1, cigA2)
        if not isOK:
            break
        (isOK, isEOF) = getNextCigarPair(filB, cigB1, cigB2)
        if not isOK:
            break
        if cigA1 != cigB1:
            if is_verbose:
                print("Not matching:\n%s\n%s" % (cigA1.lin, cigB1.lin))
            if cigA1.mapq > MAPQ_THRESH and cigB1.mapq > MAPQ_THRESH:
                exit("Discrepancy:\n%s\n%s\ncigA1.mapq=%i > MAPQ_THRESH=%i and cigB1.mapq=%i > MAPQ_THRESH=%i" % (cigA1.lin, cigB1.lin, cigA1.mapq, MAPQ_THRESH, cigB1.mapq, MAPQ_THRESH))
        if cigA2 != cigB2:
            if is_verbose:
                print("Not matching:\n%s\n%s" % (cigA2.lin, cigB2.lin)) 
            if cigA2.mapq > MAPQ_THRESH and cigB2.mapq > MAPQ_THRESH:
                exit("Discrepancy:\n%s\n%s" % (cigA2.lin, cigB2.lin))
        ctr = ctr + 1
    if not isOK and isEOF:
        isOK = True
    return isOK, ctr
            
        
        
if __name__ == '__main__':
    from testdata import DataFiles, areFilesIdentical

    df = DataFiles()
    refnam = df.joinData(DATA[0])
    readfilnamA = df.joinData(DATA[1])
    readfilnamB = df.joinData(DATA[2])
    n_pairs_expected = DATA[3]
    
    indexnam = df.addIndex(TMPFIL_PREFIX)
    oufilnam_ref = df.addTMP(TMPFIL_PREFIX + ".n0.cig")
    oufilnam_thread = df.addTMP(TMPFIL_PREFIX + ".n%i.cig" % NTHREADS)
    
    smalt_index(df, indexnam, refnam, KMER, NSKIP);

    smalt_map(df, oufilnam_ref, indexnam, readfilnamA, readfilnamB);
    smalt_map(df, oufilnam_thread, indexnam, readfilnamA, readfilnamB,
              ["-n", "%i" % NTHREADS, "-O"]);

    isOK, pairctr = cmpCigarFiles(oufilnam_ref, oufilnam_thread, VERBOSE)
    if VERBOSE:
        print("Test ok=%s, number of pairs = %i" % (isOK, pairctr))
    if not isOK or pairctr != n_pairs_expected:
        exit("Using smalt in multi-threaded mode gave inconsistent results!")

    df.cleanup()
    exit()