File: test_readFiltering.py

package info (click to toggle)
python-deeptools 3.5.6%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 34,456 kB
  • sloc: python: 14,503; xml: 4,212; sh: 33; makefile: 5
file content (167 lines) | stat: -rw-r--r-- 6,340 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import deeptools.estimateReadFiltering as est
import deeptools.alignmentSieve as sieve
import os.path
from os import unlink
import hashlib
import unittest
import pysam


ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/"
BAMFILE_FILTER = ROOT + "test_filtering.bam"
BEDFILE_FILTER = ROOT + "test_filtering.blacklist.bed"
PAIREDBAMFILE_FILTER = ROOT + "test_paired.bam"


def test_estimate_read_filtering_minimal():
    """
    Minimal testing
    """
    outfile = '/tmp/test_minimal.txt'
    args = '-b {} -o {}'.format(BAMFILE_FILTER, outfile).split()
    est.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    expected = ['Sample\tTotal Reads\tMapped Reads\tAlignments in blacklisted regions\tEstimated mapped reads filtered\tBelow MAPQ\tMissing Flags\tExcluded Flags\tInternally-determined Duplicates\tMarked Duplicates\tSingletons\tWrong strand\n',
                'test_filtering.bam\t193\t193\t0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\n']
    # strip the path from the output
    _ = resp[1].split("\t")
    _[0] = os.path.basename(_[0])
    resp[1] = "\t".join(_)
    assert f"{resp}" == f"{expected}", f"{resp} != {expected}"
    unlink(outfile)


def test_estimate_read_filtering_params():
    """
    --minMappingQuality 10 --samFlagExclude 512 --ignoreDuplicates -bl
    """
    outfile = '/tmp/test_params.txt'
    args = '-b {} --minMappingQuality 10 --samFlagExclude 512 --ignoreDuplicates -bl {} -o {}'.format(BAMFILE_FILTER, BEDFILE_FILTER, outfile).split()
    est.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()
    # strip the path from the output
    _ = resp[1].split("\t")
    _[0] = os.path.basename(_[0])
    resp[1] = "\t".join(_)
    expected = ['Sample\tTotal Reads\tMapped Reads\tAlignments in blacklisted regions\tEstimated mapped reads filtered\tBelow MAPQ\tMissing Flags\tExcluded Flags\tInternally-determined Duplicates\tMarked Duplicates\tSingletons\tWrong strand\n',
                'test_filtering.bam\t193\t193\t7\t193\t41.4\t0.0\t186.5\t31.6\t0.0\t0.0\t0.0\n']
    assert f"{resp}" == f"{expected}", f"{resp} != {expected}"
    unlink(outfile)

@unittest.skip(reason='Skip this test (see )https://github.com/deeptools/deepTools/issues/903')
def test_sieve():
    """
    Test filtering a BAM file by MAPQ, flag, and blacklist
    """
    outfile = '/tmp/test_sieve.bam'
    outfiltered = '/tmp/test_sieveFiltered.bam'
    outlog = '/tmp/test_sieve.log'
    args = '-b {} --smartLabels --minMappingQuality 10 --samFlagExclude 512 -bl {} -o {} --filterMetrics {} --filteredOutReads {}'.format(BAMFILE_FILTER, BEDFILE_FILTER, outfile, outlog, outfiltered).split()
    sieve.main(args)

    _foo = open(outlog, 'r')
    resp = _foo.readlines()
    _foo.close()

    expected = ['#bamFilterReads --filterMetrics\n',
                '#File\tReads Remaining\tTotal Initial Reads\n',
                'test_filtering\t5\t193\n']
    assert f"{resp}" == f"{expected}", f"{resp} != {expected}"
    unlink(outlog)
    h = hashlib.md5(pysam.view(outfile).encode('utf-8')).hexdigest()
    expectedh = 'acbc4443fb0387bfd6c412af9d4fc414'
    assert f'{h}' == f'{expectedh}'
    unlink(outfile)

    h1 = hashlib.md5(pysam.view(outfiltered).encode('utf-8')).hexdigest()
    expectedh = 'b90befdd5f073f14acb9a38661f301ad'
    assert f"{h1}" == f"{expectedh}"
    unlink(outfiltered)


def test_sieve_BED():
    """
    Test alignmentSieve with the --BED option
    """
    outfile = '/tmp/test_sieve.bed'
    args = '-b {} --minMappingQuality 10 --BED -o {}'.format(PAIREDBAMFILE_FILTER, outfile).split()
    sieve.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()

    expected = ['chr2\t5000026\t5000390\n',
                'chr2\t5000303\t5000711\n',
                'chr2\t5000384\t5000531\n',
                'chr2\t5000384\t5000531\n',
                'chr2\t5000559\t5000941\n',
                'chr2\t5000736\t5001171\n',
                'chr2\t5000819\t5001228\n',
                'chr2\t5000821\t5001158\n',
                'chr2\t5000821\t5001158\n',
                'chr2\t5000821\t5001158\n',
                'chr2\t5000834\t5001249\n',
                'chr2\t5000855\t5001277\n',
                'chr2\t5000867\t5001218\n',
                'chr2\t5000925\t5001023\n',
                'chr2\t5000925\t5001023\n',
                'chr2\t5000937\t5001338\n',
                'chr2\t5001010\t5001176\n',
                'chr2\t5001025\t5001431\n',
                'chr2\t5001050\t5001436\n',
                'chr2\t5001114\t5001413\n',
                'chr2\t5001115\t5001269\n',
                'chr2\t5001115\t5001269\n',
                'chr2\t5001226\t5001603\n',
                'chr2\t5001491\t5001527\n',
                'chr2\t5001700\t5001736\n']

    assert f"{resp}" == f"{expected}", f"{resp} != {expected}"
    unlink(outfile)


def test_sieve_BED_shift():
    """
    Test alignmentSieve --BED --shift
    """
    outfile = '/tmp/test_sieve_shift.bed'
    args = '-b {} --minMappingQuality 10 --BED -o {} --shift 1 -2 3 -4'.format(PAIREDBAMFILE_FILTER, outfile).split()
    sieve.main(args)

    _foo = open(outfile, 'r')
    resp = _foo.readlines()
    _foo.close()

    expected = ['chr2\t5000027\t5000388\n',
                'chr2\t5000307\t5000708\n',
                'chr2\t5000388\t5000528\n',
                'chr2\t5000385\t5000529\n',
                'chr2\t5000560\t5000939\n',
                'chr2\t5000737\t5001169\n',
                'chr2\t5000823\t5001225\n',
                'chr2\t5000825\t5001155\n',
                'chr2\t5000825\t5001155\n',
                'chr2\t5000825\t5001155\n',
                'chr2\t5000835\t5001247\n',
                'chr2\t5000859\t5001274\n',
                'chr2\t5000868\t5001216\n',
                'chr2\t5000929\t5001020\n',
                'chr2\t5000929\t5001020\n',
                'chr2\t5000941\t5001335\n',
                'chr2\t5001011\t5001174\n',
                'chr2\t5001026\t5001429\n',
                'chr2\t5001054\t5001433\n',
                'chr2\t5001118\t5001410\n',
                'chr2\t5001119\t5001266\n',
                'chr2\t5001119\t5001266\n',
                'chr2\t5001230\t5001600\n']

    assert f"{resp}" == f"{expected}", f"{resp} != {expected}"
    unlink(outfile)