1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
|
# Copyright 2009-2011 by Eric Talevich. All rights reserved.
# Revisions copyright 2009-2013 by Peter Cock. All rights reserved.
# Revisions copyright 2013 Lenna X. Peterson. All rights reserved.
# Revisions copyright 2020 Joao Rodrigues. All rights reserved.
#
# Converted by Eric Talevich from an older unit test copyright 2002
# by Thomas Hamelryck.
#
# Merged related test files into one, by Joao Rodrigues (2020)
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Unit tests for the Bio.PDB.DSSP submodule."""
import re
import subprocess
import unittest
import warnings
try:
import numpy as np # noqa: F401
except ImportError:
from Bio import MissingPythonDependencyError
raise MissingPythonDependencyError(
"Install NumPy if you want to use Bio.PDB."
) from None
from Bio.PDB import DSSP
from Bio.PDB import make_dssp_dict
from Bio.PDB import MMCIFParser
from Bio.PDB import PDBParser
VERSION_2_2_0 = (2, 2, 0)
def parse_dssp_version(version_string):
"""Parse the DSSP version into a tuple from the tool output."""
match = re.search(r"\s*([\d.]+)", version_string)
if match:
version = match.group(1)
return tuple(map(int, version.split(".")))
def will_it_float(s): # well played, whoever this was :)
"""Convert the input into a float if it is a number.
If the input is a string, the output does not change.
"""
try:
return float(s)
except ValueError:
return s
class DSSP_tool_test(unittest.TestCase):
"""Test calling DSSP from Bio.PDB."""
@classmethod
def setUpClass(cls):
cls.dssp_version = (0, 0, 0)
is_dssp_available = False
# Check if DSSP is installed
quiet_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
try:
try:
# Newer versions of DSSP
version_string = subprocess.check_output(
["dssp", "--version"], text=True
)
cls.dssp_version = parse_dssp_version(version_string)
is_dssp_available = True
except subprocess.CalledProcessError:
# Older versions of DSSP
subprocess.check_call(["dssp", "-h"], **quiet_kwargs)
is_dssp_available = True
except OSError:
try:
version_string = subprocess.check_output(
["mkdssp", "--version"], text=True
)
cls.dssp_version = parse_dssp_version(version_string)
is_dssp_available = True
except OSError:
pass
if not is_dssp_available:
raise unittest.SkipTest(
"Install dssp if you want to use it from Biopython."
)
cls.pdbparser = PDBParser()
cls.cifparser = MMCIFParser()
def test_dssp(self):
"""Test DSSP generation from PDB."""
pdbfile = "PDB/2BEG.pdb"
model = self.pdbparser.get_structure("2BEG", pdbfile)[0]
with warnings.catch_warnings():
warnings.simplefilter("ignore") # silence DSSP warnings
dssp = DSSP(model, pdbfile)
self.assertEqual(len(dssp), 130)
# Only run mmCIF tests if DSSP version installed supports mmcif
def test_dssp_with_mmcif_file(self):
"""Test DSSP generation from MMCIF."""
if self.dssp_version < VERSION_2_2_0:
self.skipTest("Test requires DSSP version 2.2.0 or greater")
pdbfile = "PDB/4ZHL.cif"
with warnings.catch_warnings():
warnings.simplefilter("ignore") # silence all warnings
model = self.cifparser.get_structure("4ZHL", pdbfile)[0]
dssp = DSSP(model, pdbfile)
self.assertEqual(len(dssp), 257)
def test_dssp_with_mmcif_file_and_nonstandard_residues(self):
"""Test DSSP generation from MMCIF with non-standard residues."""
if self.dssp_version < VERSION_2_2_0:
self.skipTest("Test requires DSSP version 2.2.0 or greater")
pdbfile = "PDB/1AS5.cif"
model = self.cifparser.get_structure("1AS5", pdbfile)[0]
with warnings.catch_warnings():
warnings.simplefilter("ignore") # silence DSSP warnings
dssp = DSSP(model, pdbfile)
self.assertEqual(len(dssp), 24)
def test_dssp_with_mmcif_file_and_different_chain_ids(self):
"""Test DSSP generation from MMCIF which has different label and author chain IDs."""
if self.dssp_version < VERSION_2_2_0:
self.skipTest("Test requires DSSP version 2.2.0 or greater")
pdbfile = "PDB/1A7G.cif"
model = self.cifparser.get_structure("1A7G", pdbfile)[0]
dssp = DSSP(model, pdbfile)
self.assertEqual(len(dssp), 82)
self.assertEqual(dssp.keys()[0][0], "E")
class DSSP_test(unittest.TestCase):
"""Tests for DSSP parsing etc which don't need the binary tool."""
def test_DSSP_file(self):
"""Test parsing of pregenerated DSSP."""
dssp, keys = make_dssp_dict("PDB/2BEG.dssp")
self.assertEqual(len(dssp), 130)
def test_DSSP_noheader_file(self):
"""Test parsing of pregenerated DSSP missing header information."""
# New DSSP prints a line containing only whitespace and "."
dssp, keys = make_dssp_dict("PDB/2BEG_noheader.dssp")
self.assertEqual(len(dssp), 130)
def test_DSSP_hbonds(self):
"""Test parsing of DSSP hydrogen bond information."""
dssp, keys = make_dssp_dict("PDB/2BEG.dssp")
dssp_indices = {v[5] for v in dssp.values()}
hb_indices = set()
# The integers preceding each hydrogen bond energy (kcal/mol) in the
# "N-H-->O O-->H-N N-H-->O O-->H-N" dssp output columns are
# relative dssp indices. Therefore, "hb_indices" contains the absolute
# dssp indices of residues participating in (provisional) h-bonds. Note
# that actual h-bonds are typically determined by an energetic
# threshold.
for val in dssp.values():
hb_indices |= {val[5] + x for x in (val[6], val[8], val[10], val[12])}
# Check if all h-bond partner indices were successfully parsed.
self.assertEqual((dssp_indices & hb_indices), hb_indices)
def test_DSSP_in_model_obj(self):
"""All elements correctly added to xtra attribute of input model object."""
p = PDBParser()
s = p.get_structure("example", "PDB/2BEG.pdb")
m = s[0]
# Read the DSSP data into the pdb object:
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
# Now compare the xtra attribute of the pdb object
# residue by residue with the pre-computed values:
i = 0
with open("PDB/dssp_xtra_Sander.txt") as fh_ref:
ref_lines = fh_ref.readlines()
for chain in m:
for res in chain:
# Split the pre-computed values into a list:
xtra_list_ref = ref_lines[i].rstrip().split("\t")
# Then convert each element to float where possible:
xtra_list_ref = list(map(will_it_float, xtra_list_ref))
# The xtra attribute is a dict.
# To compare with the pre-computed values first sort according to keys:
xtra_itemts = sorted(
res.xtra.items(), key=lambda s: s[0]
) # noqa: E731
# Then extract the list of xtra values for the residue
# and convert to floats where possible:
xtra_list = [t[1] for t in xtra_itemts]
xtra_list = list(map(will_it_float, xtra_list))
# The reason for converting to float is, that casting a float to a string in python2.6
# will include fewer decimals than python3 and an assertion error will be thrown.
self.assertEqual(xtra_list, xtra_list_ref)
i += 1
def test_DSSP_RSA(self):
"""Tests the usage of different ASA tables."""
# Tests include Sander/default, Wilke and Miller
p = PDBParser()
# Sander/default:
s = p.get_structure("example", "PDB/2BEG.pdb")
m = s[0]
# Read the DSSP data into the pdb object:
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
# Then compare the RASA values for each residue with the pre-computed values:
i = 0
with open("PDB/Sander_RASA.txt") as fh_ref:
ref_lines = fh_ref.readlines()
for chain in m:
for res in chain:
rasa_ref = float(ref_lines[i].rstrip())
rasa = float(res.xtra["EXP_DSSP_RASA"])
self.assertAlmostEqual(rasa, rasa_ref)
i += 1
# Wilke (procedure similar as for the Sander values above):
s = p.get_structure("example", "PDB/2BEG.pdb")
m = s[0]
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Wilke", "DSSP")
i = 0
with open("PDB/Wilke_RASA.txt") as fh_ref:
ref_lines = fh_ref.readlines()
for chain in m:
for res in chain:
rasa_ref = float(ref_lines[i].rstrip())
rasa = float(res.xtra["EXP_DSSP_RASA"])
self.assertAlmostEqual(rasa, rasa_ref)
i += 1
# Miller (procedure similar as for the Sander values above):
s = p.get_structure("example", "PDB/2BEG.pdb")
m = s[0]
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Miller", "DSSP")
i = 0
with open("PDB/Miller_RASA.txt") as fh_ref:
ref_lines = fh_ref.readlines()
for chain in m:
for res in chain:
rasa_ref = float(ref_lines[i].rstrip())
rasa = float(res.xtra["EXP_DSSP_RASA"])
self.assertAlmostEqual(rasa, rasa_ref)
i += 1
# Ahmad (procedure similar as for the Sander values above):
s = p.get_structure("example", "PDB/2BEG.pdb")
m = s[0]
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Ahmad", "DSSP")
i = 0
with open("PDB/Ahmad_RASA.txt") as fh_ref:
ref_lines = fh_ref.readlines()
for chain in m:
for res in chain:
rasa_ref = float(ref_lines[i].rstrip())
rasa = float(res.xtra["EXP_DSSP_RASA"])
self.assertAlmostEqual(rasa, rasa_ref)
i += 1
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)
|