1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
"""Classes to handle starting models."""
import enum
class SequenceIdentityDenominator(enum.IntEnum):
"""The denominator used while calculating the sequence identity.
One of these constants can be passed to :class:`SequenceIdentity`."""
#: Length of the shorter sequence
SHORTER_LENGTH = 1
#: Number of aligned positions (including gaps)
NUM_ALIGNED_WITH_GAPS = 2
#: Number of aligned residue pairs (not including the gaps)
NUM_ALIGNED_WITHOUT_GAPS = 3
#: Arithmetic mean sequence length
MEAN_LENGTH = 4
#: Another method not covered here
OTHER = 5
class SequenceIdentity:
"""Describe the identity between template and target sequences.
See :class:`Template`.
:param value: Percentage sequence identity.
:param denominator: Way in which sequence identity was calculated -
see :class:`SequenceIdentityDenominator`.
"""
def __init__(self, value,
denominator=SequenceIdentityDenominator.SHORTER_LENGTH):
self.value = value
self.denominator = denominator
class Template:
"""A PDB file used as a comparative modeling template for part of a
starting model.
See :class:`StartingModel`.
:param dataset: Pointer to where this template is stored.
:type dataset: :class:`~ihm.dataset.Dataset`
:param str asym_id: The author-provided asymmetric unit (chain) to use
from the template dataset (not necessarily the same as the
starting model's asym_id or the ID of the asym_unit in the
final IHM model).
:param tuple seq_id_range: The sequence range in the dataset that
is modeled by this template. Note that this numbering may differ
from the IHM numbering. See `offset` in :class:`StartingModel`.
:param tuple template_seq_id_range: The sequence range of the template
that is used in comparative modeling.
:param sequence_identity: Sequence identity between template and
the target sequence.
:type sequence_identity: :class:`SequenceIdentity` or `float`
:param alignment_file: Reference to the external file containing the
template-target alignment.
:type alignment_file: :class:`~ihm.location.Location`
"""
# todo: handle sequence_identity_denominator as an enum, not int
def __init__(self, dataset, asym_id, seq_id_range, template_seq_id_range,
sequence_identity, alignment_file=None):
self.dataset, self.asym_id = dataset, asym_id
self.seq_id_range = seq_id_range
self.template_seq_id_range = template_seq_id_range
if isinstance(sequence_identity, float):
sequence_identity = SequenceIdentity(sequence_identity)
self.sequence_identity = sequence_identity
self.alignment_file = alignment_file
class StartingModel:
"""A starting guess for modeling of an asymmetric unit
See :class:`ihm.representation.Segment` and
:attr:`ihm.System.orphan_starting_models`.
:param asym_unit: The asymmetric unit (or part of one) this starting
model represents.
:type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
:param dataset: Pointer to where this model is stored.
:type dataset: :class:`~ihm.dataset.Dataset`
:param str asym_id: The asymmetric unit (chain) to use from the starting
model's dataset (not necessarily the same as the ID of the
asym_unit in the final model).
:param list templates: A list of :class:`Template` objects, if this is
a comparative model.
:param int offset: Offset between the residue numbering in the dataset
and the IHM model (the offset is added to the starting model
numbering to give the IHM model numbering).
:param list metadata: List of PDB metadata, such as HELIX records.
:param software: The software used to generate the starting model.
:type software: :class:`~ihm.Software`
:param script_file: Reference to the external file containing the
script used to generate the starting model (usually a
:class:`~ihm.location.WorkflowFileLocation`).
:type script_file: :class:`~ihm.location.Location`
:param str description: Additional text describing the starting model.
"""
def __init__(self, asym_unit, dataset, asym_id, templates=None, offset=0,
metadata=None, software=None, script_file=None,
description=None):
self.templates = templates if templates is not None else []
self.metadata = metadata if metadata is not None else []
self.asym_unit = asym_unit
self.dataset, self.asym_id, self.offset = dataset, asym_id, offset
self.software, self.script_file = software, script_file
self.description = description
self._atoms = []
self._seq_difs = []
def get_atoms(self):
"""Yield :class:`~ihm.model.Atom` objects that represent this
starting model. This allows the starting model coordinates to
be embedded in the mmCIF file, which is useful if the starting
model is not available elsewhere (or it has been modified).
The default implementation returns an internal list of atoms;
it is usually necessary to subclass and override this method.
See :meth:`ihm.model.Model.get_spheres` for more details.
Note that the returned atoms should be those used in modeling,
not those stored in the file. In particular, the numbering scheme
should be that used in the IHM model (add `offset` to the dataset
numbering). If any residues were changed (for example it is common
to mutate MSE in the dataset to MET in the modeling) the final
mutated name should be used (MET in this case) and
:meth:`get_seq_dif` overridden to note the change.
"""
return self._atoms
def get_seq_dif(self):
"""Yield :class:`SeqDif` objects for any sequence changes between
the dataset and the starting model. See :meth:`get_atoms`.
The default implementation returns an internal list of objects;
it is usually necessary to subclass and override this method.
Note that this is always called *after* :meth:`get_atoms`.
"""
return self._seq_difs
def add_atom(self, atom):
"""Add to the model's set of :class:`~ihm.model.Atom` objects.
See :meth:`get_atoms` for more details.
"""
self._atoms.append(atom)
def add_seq_dif(self, seq_dif):
"""Add to the model's set of :class:`SeqDif` objects.
See :meth:`get_atoms` for more details.
"""
self._seq_difs.append(seq_dif)
class PDBHelix:
"""Represent a HELIX record from a PDB file."""
def __init__(self, line):
self.helix_id = line[11:14].strip()
self.start_resnam = line[14:18].strip()
self.start_asym = line[19]
self.start_resnum = int(line[21:25])
self.end_resnam = line[27:30].strip()
self.end_asym = line[31]
self.end_resnum = int(line[33:37])
self.helix_class = int(line[38:40])
self.length = int(line[71:76])
class SeqDif:
"""Annotate a sequence difference between a dataset and starting model.
See :meth:`StartingModel.get_seq_dif` and :class:`MSESeqDif`.
:param int db_seq_id: The residue index in the dataset.
:param int seq_id: The residue index in the starting model. This should
normally be `db_seq_id + offset`.
:param str db_comp_id: The name of the residue in the dataset.
:param str details: Descriptive text for the sequence difference.
"""
def __init__(self, db_seq_id, seq_id, db_comp_id, details=None):
self.db_seq_id, self.seq_id = db_seq_id, seq_id
self.db_comp_id, self.details = db_comp_id, details
class MSESeqDif:
"""Denote that a residue was mutated from MSE to MET.
See :class:`SeqDif` for a description of the parameters.
"""
def __init__(self, db_seq_id, seq_id,
details="Conversion of modified residue MSE to MET"):
self.db_seq_id, self.seq_id = db_seq_id, seq_id
self.db_comp_id, self.details = 'MSE', details
|