File: test_utils.py

package info (click to toggle)
augur 24.4.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,312 kB
  • sloc: python: 14,253; sh: 227; makefile: 35
file content (98 lines) | stat: -rw-r--r-- 4,102 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import json
import numpy as np
from pathlib import Path
from unittest.mock import patch
import pandas as pd

import pytest

from augur import utils


class TestUtils:
    @pytest.mark.parametrize("extension", ["bed","BED"])
    @patch('augur.utils.read_bed_file')
    def test_load_mask_sites_recognizes_bed_file(self, m_read_bed_file, extension):
        """load_mask_sites should handle files that end with .bed with any capitalization as a bed"""
        m_read_bed_file.return_value = [3,4]
        assert utils.load_mask_sites("mask.%s" % extension) == [3,4]
        m_read_bed_file.assert_called_with("mask.%s" % extension)

    @patch('augur.utils.read_mask_file')
    def test_load_mask_sites_recognizes_non_bed_file(self, m_read_mask_file):
        """load_mask_sites should pass any other files to read_mask_file"""
        m_read_mask_file.return_value = [5,6]
        assert utils.load_mask_sites("mask.not_a_bed")
        m_read_mask_file.assert_called_with("mask.not_a_bed")

    def test_read_mask_file_good_input(self, tmpdir):
        """read_mask_file should return a sorted list of unique sites from good mask files"""
        mask_sites = [15,200,34,200,36]
        expected_sites = sorted(set(i - 1 for i in mask_sites))
        mask_file = str(tmpdir / "temp.mask")
        with open(mask_file, "w") as fh:
            fh.write("\n".join(str(i) for i in mask_sites))
        assert utils.read_mask_file(mask_file) == expected_sites

    def test_read_mask_file_bad_lines(self, tmpdir):
        """read_mask_file should fail on bad lines in mask files"""
        mask_file = str(tmpdir / "temp.mask")
        bad_mask_sites = ["1", "#comment", "2"]
        with open(mask_file, "w") as fh:
            fh.write("\n".join(bad_mask_sites))
        with pytest.raises(ValueError):
            utils.read_mask_file(mask_file)

    def test_read_bed_file_good_input(self, tmpdir):
        """read_bed_file should read site ranges from properly formatted bed files"""
        bed_file = str(tmpdir / "temp.bed")
        bed_lines = ["SEQ\t7\t8", "SEQ\t2\t6", "SEQ\t3\t4"]
        expected_sites = [2,3,4,5,7]
        with open(bed_file, "w") as fh:
            fh.write("\n".join(bed_lines))
        assert utils.read_bed_file(bed_file) == expected_sites

    def test_read_bed_file_with_header(self, tmpdir):
        """read_bed_file should skip header lines if they exist in bed files"""
        bed_file = str(tmpdir / "temp.bed")
        bed_lines = ["CHROM\tSTART\tEND","SEQ\t7\t8", "SEQ\t2\t5"]
        expected_sites = [2,3,4,7]
        with open(bed_file, "w") as fh:
            fh.write("\n".join(bed_lines))
        assert utils.read_bed_file(bed_file) == expected_sites

    def test_read_bed_file_with_bad_lines(self, tmpdir):
        """read_bed_file should error out if any other lines are unreadable"""
        bed_file = str(tmpdir / "temp.bed")
        bed_lines = ["SEQ\t7\t8", "CHROM\tSTART\tEND", "SEQ\t2\t5"]
        with open(bed_file, "w") as fh:
            fh.write("\n".join(bed_lines))
        with pytest.raises(Exception):
            utils.read_bed_file(bed_file)

    def test_read_mask_file_drm_file(self, tmpdir):
        """read_mask_file should handle drm files as well"""
        drm_file = str(tmpdir / "temp.drm")
        drm_lines = ["SEQ\t5", "SEQ\t7"]
        expected_sites = [4,6]
        with open(drm_file, "w") as fh:
            fh.write("\n".join(drm_lines))
        assert utils.read_mask_file(drm_file) == expected_sites

    def test_write_json_data_types(self, tmpdir):
        """write_json should be able to serialize various data types."""
        data = {
            'int': np.int64(1),
            'float': np.float64(2.0),
            'array': np.array([3,4,5]),
            'series': pd.Series([6,7,8])
        }
        file = Path(tmpdir) / Path("data.json")
        utils.write_json(data, file, include_version=False)
        with open(file) as f:
            assert json.load(f) == {
                'int': 1,
                'float': 2.0,
                'array': [3,4,5],
                'series': [6,7,8]
            }