File: AlignmentFilePileup_bench.py

package info (click to toggle)
python-pysam 0.15.4+ds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 27,992 kB
  • sloc: ansic: 140,738; python: 7,881; sh: 265; makefile: 223; perl: 41
file content (147 lines) | stat: -rw-r--r-- 5,841 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""Benchmarking module for AlignmentFile functionality"""
import os

from TestUtils import BAM_DATADIR, force_str, flatten_nested_list
from PileupTestUtils import *


def test_build_pileup_from_bam_with_samtoolsshell(benchmark):
    result = benchmark(build_pileup_with_samtoolsshell,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert result == 2998


def test_build_pileup_from_bam_with_samtoolspipe(benchmark):
    result = benchmark(build_pileup_with_samtoolspipe,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert result == 2998


def test_build_pileup_from_bam_with_pysam(benchmark):
    result = benchmark(build_pileup_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert result == 2998


def test_build_depth_from_bam_with_samtoolsshell(benchmark):
    result = benchmark(build_depth_with_samtoolsshell,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert result == 107241


def test_build_depth_from_bam_with_samtoolspipe(benchmark):
    result = benchmark(build_depth_with_samtoolspipe,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert sum(result) == 107241


def test_build_depth_from_bam_with_pysam(benchmark):
    result = benchmark(build_depth_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    # different value, as samtools filters with a minimum
    # base quality of 13
    assert sum(result) == 110015


def test_build_depth_with_filter_from_bam_with_pysam(benchmark):
    result = benchmark(build_depth_with_filter_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert sum(result) == 107241


def test_build_query_bases_from_bam_with_samtoolsshell(benchmark):
    result = benchmark(build_query_bases_with_samtoolsshell,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert result == 116308


def test_build_query_bases_from_bam_with_samtoolspysam(benchmark):
    result = benchmark(build_query_bases_with_samtoolspysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert len("".join(flatten_nested_list(result))) == 116308
    

def test_build_query_bases_from_bam_with_samtoolspipe(benchmark):
    result = benchmark(build_query_bases_with_samtoolspipe,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert len("".join(flatten_nested_list(result))) == 116308


def test_build_query_bases_from_bam_with_pysam_pileups(benchmark):
    # note that there is no overlap detection here
    result = benchmark(build_query_bases_with_pysam_pileups,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert len("".join(flatten_nested_list(result))) == 107241


def test_build_query_bases_from_bam_with_pysam(benchmark):
    result = benchmark(build_query_bases_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert len("".join(flatten_nested_list(result))) == 116308


# note that pileups with/without reference sequence will differ due to
# realignment.
def test_build_query_bases_with_reference_from_bam_with_samtoolspipe(benchmark):
    result = benchmark(build_query_bases_with_samtoolspipe,
                       os.path.join(BAM_DATADIR, "ex2.bam"),
                       "-f", os.path.join(BAM_DATADIR, "ex1.fa"))
    assert len("".join(flatten_nested_list(result))) == 115924
    

def test_build_query_bases_with_reference_from_bam_with_pysam(benchmark):
    with pysam.FastaFile(os.path.join(BAM_DATADIR, "ex1.fa")) as fasta:
        result = benchmark(build_query_bases_with_pysam,
                           os.path.join(BAM_DATADIR, "ex2.bam"),
                           fastafile=fasta)
    assert len("".join(flatten_nested_list(result))) == 115924
    

def test_build_query_bases_with_reference_from_bam_with_samtoolspysam(benchmark):
    result = benchmark(build_query_bases_with_samtoolspysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"),
                       "-f", os.path.join(BAM_DATADIR, "ex1.fa"))
    assert len("".join(flatten_nested_list(result))) == 115924


def test_build_query_qualities_from_bam_with_samtoolspipe(benchmark):
    result = benchmark(build_query_qualities_with_samtoolspipe,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert len("".join(result)) == 107241


def test_build_query_qualities_from_bam_with_pysam(benchmark):
    result = benchmark(build_query_qualities_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert sum([len(x) for x in result]) == 107241


def test_build_query_names_from_bam_with_pysam(benchmark):
    result = benchmark(build_query_names_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert len("".join([x for column in result for x in column])) == 2307343


def test_build_mapping_qualities_from_bam_with_samtoolspipe(benchmark):
    result = benchmark(build_mapping_qualities_with_samtoolspipe,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert len("".join(result)) == 107241


def test_build_mapping_qualities_from_bam_with_pysam(benchmark):
    result = benchmark(build_mapping_qualities_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert sum([len(x) for x in result]) == 107241


def test_build_query_positions_from_bam_with_samtoolspipe(benchmark):
    result = benchmark(build_query_positions_with_samtoolspipe,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    # positions output by samtools are 1-based
    assert sum([sum(x) - len(x) for x in result]) == 1841699


def test_build_query_positions_from_bam_with_pysam(benchmark):
    result = benchmark(build_query_positions_with_pysam,
                       os.path.join(BAM_DATADIR, "ex2.bam"))
    assert sum([sum(x) for x in result]) == 1841699