1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
"""Tests for PROPKA"""
import logging
import os
import re
import json
from pathlib import Path
import pytest
from pytest import approx
from propka.parameters import Parameters
from propka.molecular_container import MolecularContainer
from propka.input import read_parameter_file, read_molecule_file
from propka.lib import loadOptions
from typing import List
_LOGGER = logging.getLogger(__name__)
# Number of decimal places for maximum tolerable error. Set by number of
# decimal places in pKa output as well as need to make unmodified code work
# on WSL Ubuntu 18.04
MAX_ERR_DECIMALS = 2
MAX_ERR_ABS = 10**-MAX_ERR_DECIMALS
# This directory
TEST_DIR = Path("tests")
# Location for test PDBs
PDB_DIR = Path("pdb")
# Location for results for comparing output (allow running from tests/ and
# ../tests/)
RESULTS_DIR = Path("tests/results")
if not RESULTS_DIR.is_dir():
_LOGGER.warning("Switching to sub-directory")
RESULTS_DIR = Path("results")
def get_test_dirs():
"""Get locations of test files.
Returns:
dictionary with test file locations.
"""
path_dict = {}
for key, path in [("pdbs", PDB_DIR), ("results", RESULTS_DIR)]:
test_path = TEST_DIR / path
if test_path.is_dir():
path_dict[key] = test_path
else:
test_path = path
if test_path.is_dir():
path_dict[key] = test_path
else:
errstr = (
"Can't find {0:s} test files in {1:s}".format(
key, [TEST_DIR / path, path]))
raise FileNotFoundError(errstr)
return path_dict
def run_propka(options, pdb_path, tmp_path):
"""Run PROPKA software.
Args:
options: list of PROPKA options
pdb_path: path to PDB file
tmp_path: path for working directory
"""
options += [str(pdb_path)]
args = loadOptions(options)
cwd = Path.cwd()
try:
_LOGGER.warning(
"Working in tmpdir {0:s} because of PROPKA file output; "
"need to fix this.".format(str(tmp_path)))
os.chdir(tmp_path)
parameters = read_parameter_file(args.parameters, Parameters())
molecule = MolecularContainer(parameters, args)
molecule = read_molecule_file(str(pdb_path), molecule)
molecule.calculate_pka()
molecule.write_pka()
finally:
os.chdir(cwd)
def parse_pka(pka_path: Path) -> dict:
"""Parse testable data from a .pka file into a dictionary.
"""
pka_list: List[float] = []
data: dict = {"pKa": pka_list}
with open(pka_path, "rt") as pka_file:
at_pka = False
for line in pka_file:
if at_pka:
if line.startswith("---"):
at_pka = False
else:
m = re.search(r'\d+\.\d+', line[13:])
assert m is not None
pka_list.append(float(m.group()))
elif "model-pKa" in line:
at_pka = True
else:
m = re.match(
r"The pI is *(\d+\.\d+) .folded. and *(\d+\.\d+) .unfolded.",
line)
if m is not None:
data["pI_folded"] = float(m.group(1))
data["pI_unfolded"] = float(m.group(2))
return data
def compare_output(pdb, tmp_path, ref_path):
"""Compare results of test with reference.
Args:
pdb: PDB filename stem
tmp_path: temporary directory
ref_path: path with reference results
Raises:
ValueError if results disagree.
"""
with open(ref_path, "rt") as ref_file:
if ref_path.name.endswith(".json"):
ref_data = json.load(ref_file)
else:
ref_data = {"pKa": [float(line) for line in ref_file]}
test_data = parse_pka(tmp_path / f"{pdb}.pka")
for key in ref_data:
assert test_data[key] == approx(ref_data[key], abs=MAX_ERR_ABS), key
@pytest.mark.parametrize("pdb, options", [
pytest.param('sample-issue-140', [], id="sample-issue-140: no options"),
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('1HPX', [], id="1HPX: no options"),
pytest.param('4DFR', [], id="4DFR: no options"),
pytest.param('3SGB', [], id="3SGB: no options"),
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
def test_regression(pdb, options, tmp_path):
"""Basic regression test of PROPKA functionality."""
path_dict = get_test_dirs()
ref_path = None
for ext in ["json", "dat"]:
ref_path = path_dict["results"] / f"{pdb}.{ext}"
if ref_path.is_file():
ref_path = ref_path.resolve()
break
else:
_LOGGER.warning("Missing results file for comparison: {0:s}".format(
str(ref_path)))
ref_path = None
pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))
if pdb_path.is_file():
pdb_path = pdb_path.resolve()
else:
errstr = "Missing PDB file: {0:s}".format(pdb_path)
raise FileNotFoundError(errstr)
tmp_path = Path(tmp_path).resolve()
run_propka(options, pdb_path, tmp_path)
if ref_path is not None:
compare_output(pdb, tmp_path, ref_path)
|