1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
import gzip
import os
import pysam
from TestUtils import TABIX_DATADIR
FN_COMPRESSED = "example.bed.gz"
FN_UNCOMPRESSED = "example.bed"
FN_LARGE_COMPRESSED = "example_large.bed.gz"
FN_LARGE_UNCOMPRESSED = "example_large.bed"
def read_python_compressed(fn):
'''iterate through with python.'''
with gzip.open(fn, mode="r") as f:
return len([x.split(b"\t") for x in f])
def read_python_uncompressed(fn):
with open(fn) as f:
return len([x.split("\t") for x in f])
def fetch_plain(fn):
with pysam.Tabixfile(fn) as f:
return len(list(f.fetch()))
def fetch_parsed(fn):
with pysam.Tabixfile(fn) as f:
return len(list(f.fetch(parser=pysam.asBed())))
def iterate_generic_compressed(fn):
with gzip.open(fn) as f:
return len(list(pysam.tabix_generic_iterator(f, parser=pysam.asBed())))
def iterate_generic_uncompressed(fn):
with open(fn) as f:
return len(list(pysam.tabix_generic_iterator(f, parser=pysam.asBed())))
def iterate_parsed_compressed(fn):
with gzip.open(fn) as f:
return len(list(pysam.tabix_iterator(f, parser=pysam.asBed())))
def iterate_parsed_uncompressed(fn):
with open(fn) as f:
return len(list(pysam.tabix_iterator(f, parser=pysam.asBed())))
def iterate_file_compressed(fn):
with gzip.open(fn) as f:
return len(list(pysam.tabix_file_iterator(f, parser=pysam.asBed())))
def iterate_file_uncompressed(fn):
with open(fn) as f:
return len(list(pysam.tabix_file_iterator(f, parser=pysam.asBed())))
def test_read_python_compressed(benchmark):
result = benchmark(read_python_compressed,
os.path.join(TABIX_DATADIR, FN_COMPRESSED))
assert result == 164
def test_read_python_uncompressed(benchmark):
result = benchmark(read_python_uncompressed,
os.path.join(TABIX_DATADIR, FN_UNCOMPRESSED))
assert result == 164
def test_fetch_plain(benchmark):
result = benchmark(fetch_plain, os.path.join(TABIX_DATADIR, FN_COMPRESSED))
assert result == 164
def test_fetch_parsed(benchmark):
result = benchmark(fetch_parsed, os.path.join(
TABIX_DATADIR, FN_COMPRESSED))
assert result == 164
def test_iterate_generic_compressed(benchmark):
result = benchmark(iterate_generic_compressed,
os.path.join(TABIX_DATADIR, FN_COMPRESSED))
assert result == 164
def test_iterate_generic_uncompressed(benchmark):
result = benchmark(iterate_generic_uncompressed,
os.path.join(TABIX_DATADIR, FN_UNCOMPRESSED))
assert result == 164
def test_iterate_parsed_compressed(benchmark):
result = benchmark(iterate_parsed_compressed,
os.path.join(TABIX_DATADIR, FN_COMPRESSED))
assert result == 164
def test_iterate_parsed_uncompressed(benchmark):
result = benchmark(iterate_parsed_uncompressed,
os.path.join(TABIX_DATADIR, FN_UNCOMPRESSED))
assert result == 164
def test_iterate_file_compressed(benchmark):
result = benchmark(iterate_file_compressed,
os.path.join(TABIX_DATADIR, FN_COMPRESSED))
assert result == 164
def test_iterate_file_uncompressed(benchmark):
result = benchmark(iterate_file_uncompressed,
os.path.join(TABIX_DATADIR, FN_UNCOMPRESSED))
assert result == 164
def test_read_python_large_compressed(benchmark):
result = benchmark(read_python_compressed, os.path.join(
TABIX_DATADIR, FN_LARGE_COMPRESSED))
assert result == 100000
def test_read_python_large_uncompressed(benchmark):
result = benchmark(read_python_uncompressed, os.path.join(
TABIX_DATADIR, FN_LARGE_UNCOMPRESSED))
assert result == 100000
def test_fetch_plain(benchmark):
result = benchmark(fetch_plain, os.path.join(
TABIX_DATADIR, FN_LARGE_COMPRESSED))
assert result == 100000
def test_fetch_parsed(benchmark):
result = benchmark(fetch_parsed, os.path.join(
TABIX_DATADIR, FN_LARGE_COMPRESSED))
assert result == 100000
def test_iterate_generic_large_compressed(benchmark):
result = benchmark(iterate_generic_compressed, os.path.join(
TABIX_DATADIR, FN_LARGE_COMPRESSED))
assert result == 100000
def test_iterate_generic_large_uncompressed(benchmark):
result = benchmark(iterate_generic_uncompressed, os.path.join(
TABIX_DATADIR, FN_LARGE_UNCOMPRESSED))
assert result == 100000
def test_iterate_parsed_large_compressed(benchmark):
result = benchmark(iterate_parsed_compressed, os.path.join(
TABIX_DATADIR, FN_LARGE_COMPRESSED))
assert result == 100000
def test_iterate_parsed_large_uncompressed(benchmark):
result = benchmark(iterate_parsed_uncompressed, os.path.join(
TABIX_DATADIR, FN_LARGE_UNCOMPRESSED))
assert result == 100000
def test_iterate_file_large_compressed(benchmark):
result = benchmark(iterate_file_compressed, os.path.join(
TABIX_DATADIR, FN_LARGE_COMPRESSED))
assert result == 100000
def test_iterate_file_large_uncompressed(benchmark):
result = benchmark(iterate_file_uncompressed, os.path.join(
TABIX_DATADIR, FN_LARGE_UNCOMPRESSED))
assert result == 100000
|