File: startmodel.py

package info (click to toggle)
python-ihm 2.7-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 3,368 kB
  • sloc: python: 30,422; ansic: 5,990; sh: 24; makefile: 20
file content (199 lines) | stat: -rw-r--r-- 8,482 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
"""Classes to handle starting models."""

import enum


class SequenceIdentityDenominator(enum.IntEnum):
    """The denominator used while calculating the sequence identity.
       One of these constants can be passed to :class:`SequenceIdentity`."""

    #: Length of the shorter sequence
    SHORTER_LENGTH = 1

    #: Number of aligned positions (including gaps)
    NUM_ALIGNED_WITH_GAPS = 2

    #: Number of aligned residue pairs (not including the gaps)
    NUM_ALIGNED_WITHOUT_GAPS = 3

    #: Arithmetic mean sequence length
    MEAN_LENGTH = 4

    #: Another method not covered here
    OTHER = 5


class SequenceIdentity:
    """Describe the identity between template and target sequences.
       See :class:`Template`.

       :param value: Percentage sequence identity.
       :param denominator: Way in which sequence identity was calculated -
              see :class:`SequenceIdentityDenominator`.
    """
    def __init__(self, value,
                 denominator=SequenceIdentityDenominator.SHORTER_LENGTH):
        self.value = value
        self.denominator = denominator


class Template:
    """A PDB file used as a comparative modeling template for part of a
       starting model.

       See :class:`StartingModel`.

       :param dataset: Pointer to where this template is stored.
       :type dataset: :class:`~ihm.dataset.Dataset`
       :param str asym_id: The author-provided asymmetric unit (chain) to use
              from the template dataset (not necessarily the same as the
              starting model's asym_id or the ID of the asym_unit in the
              final IHM model).
       :param tuple seq_id_range: The sequence range in the dataset that
              is modeled by this template. Note that this numbering may differ
              from the IHM numbering. See `offset` in :class:`StartingModel`.
       :param tuple template_seq_id_range: The sequence range of the template
              that is used in comparative modeling.
       :param sequence_identity: Sequence identity between template and
              the target sequence.
       :type sequence_identity: :class:`SequenceIdentity` or `float`
       :param alignment_file: Reference to the external file containing the
              template-target alignment.
       :type alignment_file: :class:`~ihm.location.Location`
    """
    # todo: handle sequence_identity_denominator as an enum, not int

    def __init__(self, dataset, asym_id, seq_id_range, template_seq_id_range,
                 sequence_identity, alignment_file=None):
        self.dataset, self.asym_id = dataset, asym_id
        self.seq_id_range = seq_id_range
        self.template_seq_id_range = template_seq_id_range
        if isinstance(sequence_identity, float):
            sequence_identity = SequenceIdentity(sequence_identity)
        self.sequence_identity = sequence_identity
        self.alignment_file = alignment_file


class StartingModel:
    """A starting guess for modeling of an asymmetric unit

       See :class:`ihm.representation.Segment` and
       :attr:`ihm.System.orphan_starting_models`.

       :param asym_unit: The asymmetric unit (or part of one) this starting
              model represents.
       :type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
       :param dataset: Pointer to where this model is stored.
       :type dataset: :class:`~ihm.dataset.Dataset`
       :param str asym_id: The asymmetric unit (chain) to use from the starting
              model's dataset (not necessarily the same as the ID of the
              asym_unit in the final model).
       :param list templates: A list of :class:`Template` objects, if this is
              a comparative model.
       :param int offset: Offset between the residue numbering in the dataset
              and the IHM model (the offset is added to the starting model
              numbering to give the IHM model numbering).
       :param list metadata: List of PDB metadata, such as HELIX records.
       :param software: The software used to generate the starting model.
       :type software: :class:`~ihm.Software`
       :param script_file: Reference to the external file containing the
              script used to generate the starting model (usually a
              :class:`~ihm.location.WorkflowFileLocation`).
       :type script_file: :class:`~ihm.location.Location`
       :param str description: Additional text describing the starting model.
    """
    def __init__(self, asym_unit, dataset, asym_id, templates=None, offset=0,
                 metadata=None, software=None, script_file=None,
                 description=None):
        self.templates = templates if templates is not None else []
        self.metadata = metadata if metadata is not None else []
        self.asym_unit = asym_unit
        self.dataset, self.asym_id, self.offset = dataset, asym_id, offset
        self.software, self.script_file = software, script_file
        self.description = description
        self._atoms = []
        self._seq_difs = []

    def get_atoms(self):
        """Yield :class:`~ihm.model.Atom` objects that represent this
           starting model. This allows the starting model coordinates to
           be embedded in the mmCIF file, which is useful if the starting
           model is not available elsewhere (or it has been modified).

           The default implementation returns an internal list of atoms;
           it is usually necessary to subclass and override this method.
           See :meth:`ihm.model.Model.get_spheres` for more details.

           Note that the returned atoms should be those used in modeling,
           not those stored in the file. In particular, the numbering scheme
           should be that used in the IHM model (add `offset` to the dataset
           numbering). If any residues were changed (for example it is common
           to mutate MSE in the dataset to MET in the modeling) the final
           mutated name should be used (MET in this case) and
           :meth:`get_seq_dif` overridden to note the change.
        """
        return self._atoms

    def get_seq_dif(self):
        """Yield :class:`SeqDif` objects for any sequence changes between
           the dataset and the starting model. See :meth:`get_atoms`.

           The default implementation returns an internal list of objects;
           it is usually necessary to subclass and override this method.

           Note that this is always called *after* :meth:`get_atoms`.
        """
        return self._seq_difs

    def add_atom(self, atom):
        """Add to the model's set of :class:`~ihm.model.Atom` objects.

           See :meth:`get_atoms` for more details.
        """
        self._atoms.append(atom)

    def add_seq_dif(self, seq_dif):
        """Add to the model's set of :class:`SeqDif` objects.

           See :meth:`get_atoms` for more details.
        """
        self._seq_difs.append(seq_dif)


class PDBHelix:
    """Represent a HELIX record from a PDB file."""
    def __init__(self, line):
        self.helix_id = line[11:14].strip()
        self.start_resnam = line[14:18].strip()
        self.start_asym = line[19]
        self.start_resnum = int(line[21:25])
        self.end_resnam = line[27:30].strip()
        self.end_asym = line[31]
        self.end_resnum = int(line[33:37])
        self.helix_class = int(line[38:40])
        self.length = int(line[71:76])


class SeqDif:
    """Annotate a sequence difference between a dataset and starting model.
       See :meth:`StartingModel.get_seq_dif` and :class:`MSESeqDif`.

       :param int db_seq_id: The residue index in the dataset.
       :param int seq_id: The residue index in the starting model. This should
              normally be `db_seq_id + offset`.
       :param str db_comp_id: The name of the residue in the dataset.
       :param str details: Descriptive text for the sequence difference.
    """
    def __init__(self, db_seq_id, seq_id, db_comp_id, details=None):
        self.db_seq_id, self.seq_id = db_seq_id, seq_id
        self.db_comp_id, self.details = db_comp_id, details


class MSESeqDif:
    """Denote that a residue was mutated from MSE to MET.
       See :class:`SeqDif` for a description of the parameters.
    """
    def __init__(self, db_seq_id, seq_id,
                 details="Conversion of modified residue MSE to MET"):
        self.db_seq_id, self.seq_id = db_seq_id, seq_id
        self.db_comp_id, self.details = 'MSE', details