File: test_pbdataset_utils.py

package info (click to toggle)
python-pbcore 1.7.1%2Bgit20200430.a127b1e%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,404 kB
  • sloc: python: 23,243; xml: 2,504; makefile: 232; sh: 66
file content (113 lines) | stat: -rw-r--r-- 3,916 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import logging
import tempfile

from pbcore.io.dataset.DataSetMetaTypes import dsIdToSuffix
from pbcore.io import (DataSetMetaTypes, divideKeys,
                       SubreadSet, getDataSetUuid, getDataSetMetaType)

import pbcore.data as upstreamdata

log = logging.getLogger(__name__)


def keysToRanges(keys):
    key_ranges = [[min(k), max(k)] for k in keys]
    return key_ranges


class TestDataSetUtils:
    """Unit and integrationt tests for the DataSet class and \
    associated module functions"""

    def test_get_dataset_uuid(self):
        ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
        ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        ds.write(ds_file)
        uuid = getDataSetUuid(ds_file)
        assert uuid == ds.uuid
        with open(ds_file, "w") as out:
            out.write("hello world!")
        uuid = getDataSetUuid(ds_file)
        assert uuid is None

    def test_get_dataset_metatype(self):
        ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
        ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        ds.write(ds_file)
        meta_type = getDataSetMetaType(ds_file)
        assert meta_type == "PacBio.DataSet.SubreadSet"

    def test_dsIdToSuffix(self):
        suffixes = ['subreadset.xml', 'alignmentset.xml',
                    'barcodeset.xml', 'consensusreadset.xml',
                    'consensusalignmentset.xml',
                    'referenceset.xml', 'contigset.xml']
        for dsId, exp in zip(DataSetMetaTypes.ALL, suffixes):
            assert dsIdToSuffix(dsId) == exp

    def test_divideKeys_keysToRanges(self):
        keys = [0, 1, 2, 3, 5, 8, 50]
        res = divideKeys(keys, 0)
        assert res == []
        res = keysToRanges(res)
        assert res == []

        res = divideKeys(keys, 1)
        assert res == [[0, 1, 2, 3, 5, 8, 50]]
        res = keysToRanges(res)
        assert res == [[0, 50]]

        res = divideKeys(keys, 2)
        assert res == [[0, 1, 2], [3, 5, 8, 50]]
        res = keysToRanges(res)
        assert res == [[0, 2], [3, 50]]

        res = divideKeys(keys, 3)
        assert res == [[0, 1], [2, 3], [5, 8, 50]]
        res = keysToRanges(res)
        assert res == [[0, 1], [2, 3], [5, 50]]

        res = divideKeys(keys, 7)
        assert res == [[0], [1], [2], [3], [5], [8], [50]]
        res = keysToRanges(res)
        assert res == [[0, 0], [1, 1], [2, 2], [3, 3],
                       [5, 5], [8, 8], [50, 50]]

        res = divideKeys(keys, 8)
        assert res == [[0], [1], [2], [3], [5], [8], [50]]
        res = keysToRanges(res)
        assert res == [[0, 0], [1, 1], [2, 2], [3, 3],
                       [5, 5], [8, 8], [50, 50]]

        keys = [0, 1, 2, 2, 3, 5, 8, 50, 50]
        res = divideKeys(keys, 0)
        assert res == []
        res = keysToRanges(res)
        assert res == []

        res = divideKeys(keys, 1)
        assert res == [[0, 1, 2, 2, 3, 5, 8, 50, 50]]
        res = keysToRanges(res)
        assert res == [[0, 50]]

        res = divideKeys(keys, 2)
        assert res == [[0, 1, 2, 2], [3, 5, 8, 50, 50]]
        res = keysToRanges(res)
        assert res == [[0, 2], [3, 50]]

        res = divideKeys(keys, 3)
        assert res == [[0, 1, 2], [2, 3, 5], [8, 50, 50]]
        res = keysToRanges(res)
        assert res == [[0, 2], [2, 5], [8, 50]]

        res = divideKeys(keys, 9)
        assert res == [[0], [1], [2], [2], [3], [5], [8], [50], [50]]
        res = keysToRanges(res)
        assert res == [[0, 0], [1, 1], [2, 2], [2, 2], [3, 3],
                       [5, 5], [8, 8], [50, 50], [50, 50]]

        res = divideKeys(keys, 10)
        assert res == [[0], [1], [2], [2], [3], [5], [8], [50], [50]]
        res = keysToRanges(res)
        assert res == [[0, 0], [1, 1], [2, 2], [2, 2], [3, 3],
                       [5, 5], [8, 8], [50, 50], [50, 50]]