File: dataset.py

package info (click to toggle)
python-ihm 2.7-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 3,368 kB
  • sloc: python: 30,422; ansic: 5,990; sh: 24; makefile: 20
file content (236 lines) | stat: -rw-r--r-- 7,551 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# coding=utf-8

"""Classes for handling experimental datasets used by mmCIF models.
"""


class Dataset:
    """A set of input data, for example, a crystal structure or EM map.

       :param location: a pointer to where the
              dataset is stored. This is usually a subclass of
              :class:`~ihm.location.DatabaseLocation` if the dataset is
              deposited in a database such as PDB or EMDB, or
              :class:`~ihm.location.InputFileLocation` if the dataset is stored
              in an external file.
       :type location: :class:`ihm.location.Location`
       :param str details: Text giving more information about the dataset.
    """

    _eq_keys = ['_locations']
    _allow_duplicates = False

    # Datasets compare equal iff they are the same class, have the
    # same attributes, and allow_duplicates=False
    def _eq_vals(self):
        if self._allow_duplicates:
            return id(self)
        else:
            return tuple([self.__class__]
                         + [getattr(self, x) for x in self._eq_keys])

    def __eq__(self, other):
        return self._eq_vals() == other._eq_vals()

    def __hash__(self):
        return hash(self._eq_vals())

    data_type = 'Other'

    def __init__(self, location, details=None):
        # The dictionary actually allows for multiple locations for a given
        # dataset. Support this via a private attribute so we can at least
        # handle reading existing files. 'location' just references the
        # first location in this list.
        self._locations = ()

        self.location, self.details = location, details

        #: A list of :class:`Dataset` and/or :class:`TransformedDataset`
        #: objects from which this one was derived.
        #: For example, a 3D EM map may be derived from a set of 2D images.
        self.parents = []

    def _add_location(self, loc):
        if self.location is None:
            self.location = loc
        else:
            self._locations += (loc,)

    def __get_location(self):
        return self._locations[0]

    def __set_location(self, val):
        self._locations = (val, )

    location = property(__get_location, __set_location,
                        doc="A pointer to where the dataset is stored")

    def add_primary(self, dataset):
        """Add another Dataset from which this one was ultimately derived,
           i.e. it is added as a parent, unless a parent already exists,
           in which case it is added as a grandparent, and so on."""
        root = self
        while root.parents:
            if len(root.parents) > 1:
                raise ValueError("This dataset has multiple parents - don't "
                                 "know which one to add to")
            root = root.parents[0]
        root.parents.append(dataset)


class TransformedDataset:
    """A :class:`Dataset` that should be rotated or translated before using.
       This is typically used for derived datasets
       (see :attr:`Dataset.parents`) where the derived dataset lies in a
       different dataset from the parent (for example, it was moved to better
       align with the model's reference frame or other experimental data).
       The transformation that places the derived dataset on the parent
       is recorded here.

       :param dataset: The (parent) dataset.
       :type dataset: :class:`Dataset`
       :param transform: The rotation and translation that places a
              derived dataset on this dataset.
       :type transform: :class:`ihm.geometry.Transformation`
    """
    def __init__(self, dataset, transform):
        self.dataset, self.transform = dataset, transform


class DatasetGroup(list):
    """A set of :class:`Dataset` objects that are handled together.
       This is implemented as a simple list.

       :param sequence elements: Initial set of datasets.
       :param str name: Short text name of this group.
       :param str application: Text that shows how this group is used.
       :param str details: Longer text that describes this group.

       Normally a group is passed to one or more
       :class:`~ihm.protocol.Protocol` or :class:`~ihm.analysis.Analysis`
       objects, although unused groups can still be included in the file
       if desired by adding them to :attr:`ihm.System.orphan_dataset_groups`.
    """

    # For backwards compatibility with earlier versions of this class which
    # didn't specify name/application/details
    name = application = details = None

    def __init__(self, elements=(), name=None, application=None, details=None):
        super().__init__(elements)
        self.name, self.application = name, application
        self.details = details


class CXMSDataset(Dataset):
    """Processed cross-links from a CX-MS experiment"""
    data_type = 'Crosslinking-MS data'


class MassSpecDataset(Dataset):
    """Raw mass spectrometry files such as peaklists"""
    data_type = 'Mass Spectrometry data'


class HDXDataset(Dataset):
    """Data from a hydrogen/deuterium exchange experiment"""
    data_type = 'H/D exchange data'


class PDBDataset(Dataset):
    """An experimentally-determined 3D structure as a set of a coordinates,
       usually in a PDB or mmCIF file"""
    data_type = 'Experimental model'


class ComparativeModelDataset(Dataset):
    """A 3D structure determined by comparative modeling"""
    data_type = 'Comparative model'


class IntegrativeModelDataset(Dataset):
    """A 3D structure determined by integrative modeling"""
    data_type = 'Integrative model'


class DeNovoModelDataset(Dataset):
    """A 3D structure determined by de novo modeling"""
    data_type = 'De Novo model'


class NMRDataset(Dataset):
    """A nuclear magnetic resonance (NMR) dataset"""
    data_type = 'NMR data'


class MutagenesisDataset(Dataset):
    """Mutagenesis data"""
    data_type = 'Mutagenesis data'


class EMDensityDataset(Dataset):
    """A 3D electron microscopy dataset"""
    data_type = '3DEM volume'


class EMMicrographsDataset(Dataset):
    """Raw 2D electron micrographs"""
    data_type = 'EM raw micrographs'


class EM2DClassDataset(Dataset):
    """2DEM class average"""
    data_type = '2DEM class average'


class SASDataset(Dataset):
    """SAS data"""
    data_type = 'SAS data'


class FRETDataset(Dataset):
    """Single molecule data from a Förster resonance energy transfer
       (FRET) experiment"""
    data_type = 'Single molecule FRET data'


class EnsembleFRETDataset(Dataset):
    """Ensemble data from a Förster resonance energy transfer
       (FRET) experiment"""
    data_type = 'Ensemble FRET data'


class YeastTwoHybridDataset(Dataset):
    """Yeast two-hybrid data"""
    data_type = 'Yeast two-hybrid screening data'


class GeneticInteractionsDataset(Dataset):
    """Quantitative measurements of genetic interactions"""
    data_type = 'Quantitative measurements of genetic interactions'


class EPRDataset(Dataset):
    """Electron paramagnetic resonance (EPR) data"""
    data_type = 'EPR data'


class XRayDiffractionDataset(Dataset):
    """Data from X-ray diffraction"""
    data_type = 'X-ray diffraction data'


class HydroxylRadicalFootprintingDataset(Dataset):
    """Data from hydroxyl radical footprinting"""
    data_type = 'Hydroxyl radical footprinting data'


class DNAFootprintingDataset(Dataset):
    """Data from DNA footprinting"""
    data_type = 'DNA footprinting data'


class PredictedContactsDataset(Dataset):
    """A collection of predicted contacts"""
    data_type = 'Predicted contacts'