File: test_basic_regression.py

package info (click to toggle)
propka 3.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,888 kB
  • sloc: python: 7,156; makefile: 39
file content (172 lines) | stat: -rw-r--r-- 5,469 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""Tests for PROPKA"""
import logging
import os
import re
import json
from pathlib import Path
import pytest
from pytest import approx
from propka.parameters import Parameters
from propka.molecular_container import MolecularContainer
from propka.input import read_parameter_file, read_molecule_file
from propka.lib import loadOptions
from typing import List


_LOGGER = logging.getLogger(__name__)


# Number of decimal places for maximum tolerable error.  Set by number of
# decimal places in pKa output as well as need to make unmodified code work
# on WSL Ubuntu 18.04
MAX_ERR_DECIMALS = 2
MAX_ERR_ABS = 10**-MAX_ERR_DECIMALS


# This directory
TEST_DIR = Path("tests")
# Location for test PDBs
PDB_DIR = Path("pdb")
# Location for results for comparing output (allow running from tests/ and
# ../tests/)
RESULTS_DIR = Path("tests/results")
if not RESULTS_DIR.is_dir():
    _LOGGER.warning("Switching to sub-directory")
    RESULTS_DIR = Path("results")


def get_test_dirs():
    """Get locations of test files.

    Returns:
        dictionary with test file locations.
    """
    path_dict = {}
    for key, path in [("pdbs", PDB_DIR), ("results", RESULTS_DIR)]:
        test_path = TEST_DIR / path
        if test_path.is_dir():
            path_dict[key] = test_path
        else:
            test_path = path
            if test_path.is_dir():
                path_dict[key] = test_path
            else:
                errstr = (
                    "Can't find {0:s} test files in {1:s}".format(
                        key, [TEST_DIR / path, path]))
                raise FileNotFoundError(errstr)
    return path_dict


def run_propka(options, pdb_path, tmp_path):
    """Run PROPKA software.

    Args:
        options:  list of PROPKA options
        pdb_path:  path to PDB file
        tmp_path:  path for working directory
    """
    options += [str(pdb_path)]
    args = loadOptions(options)
    cwd = Path.cwd()
    try:
        _LOGGER.warning(
            "Working in tmpdir {0:s} because of PROPKA file output; "
            "need to fix this.".format(str(tmp_path)))
        os.chdir(tmp_path)
        parameters = read_parameter_file(args.parameters, Parameters())
        molecule = MolecularContainer(parameters, args)
        molecule = read_molecule_file(str(pdb_path), molecule)
        molecule.calculate_pka()
        molecule.write_pka()
    finally:
        os.chdir(cwd)


def parse_pka(pka_path: Path) -> dict:
    """Parse testable data from a .pka file into a dictionary.
    """
    pka_list: List[float] = []
    data: dict = {"pKa": pka_list}

    with open(pka_path, "rt") as pka_file:
        at_pka = False
        for line in pka_file:
            if at_pka:
                if line.startswith("---"):
                    at_pka = False
                else:
                    m = re.search(r'\d+\.\d+', line[13:])
                    assert m is not None
                    pka_list.append(float(m.group()))
            elif "model-pKa" in line:
                at_pka = True
            else:
                m = re.match(
                    r"The pI is *(\d+\.\d+) .folded. and *(\d+\.\d+) .unfolded.",
                    line)
                if m is not None:
                    data["pI_folded"] = float(m.group(1))
                    data["pI_unfolded"] = float(m.group(2))

    return data


def compare_output(pdb, tmp_path, ref_path):
    """Compare results of test with reference.

    Args:
        pdb:  PDB filename stem
        tmp_path:  temporary directory
        ref_path:  path with reference results
    Raises:
        ValueError if results disagree.
    """
    with open(ref_path, "rt") as ref_file:
        if ref_path.name.endswith(".json"):
            ref_data = json.load(ref_file)
        else:
            ref_data = {"pKa": [float(line) for line in ref_file]}

    test_data = parse_pka(tmp_path / f"{pdb}.pka")

    for key in ref_data:
        assert test_data[key] == approx(ref_data[key], abs=MAX_ERR_ABS), key


@pytest.mark.parametrize("pdb, options", [
    pytest.param('sample-issue-140', [], id="sample-issue-140: no options"),
    pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
    pytest.param('1HPX', [], id="1HPX: no options"),
    pytest.param('4DFR', [], id="4DFR: no options"),
    pytest.param('3SGB', [], id="3SGB: no options"),
    pytest.param('3SGB-subset', [
        "--titrate_only",
        "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
                 id="3SGB: --titrate_only"),
    pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
def test_regression(pdb, options, tmp_path):
    """Basic regression test of PROPKA functionality."""
    path_dict = get_test_dirs()
    ref_path = None

    for ext in ["json", "dat"]:
        ref_path = path_dict["results"] / f"{pdb}.{ext}"
        if ref_path.is_file():
            ref_path = ref_path.resolve()
            break
    else:
        _LOGGER.warning("Missing results file for comparison: {0:s}".format(
            str(ref_path)))
        ref_path = None
    pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))
    if pdb_path.is_file():
        pdb_path = pdb_path.resolve()
    else:
        errstr = "Missing PDB file: {0:s}".format(pdb_path)
        raise FileNotFoundError(errstr)
    tmp_path = Path(tmp_path).resolve()

    run_propka(options, pdb_path, tmp_path)
    if ref_path is not None:
        compare_output(pdb, tmp_path, ref_path)