File: reference.py

package info (click to toggle)
python-ihm 2.7-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 3,368 kB
  • sloc: python: 30,422; ansic: 5,990; sh: 24; makefile: 20
file content (154 lines) | stat: -rw-r--r-- 6,331 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""Classes for providing extra information about an :class:`ihm.Entity`"""

import urllib.request


class Reference:
    """Base class for extra information about an :class:`ihm.Entity`.

       This class is not used directly; instead, use a subclass such as
       :class:`Sequence` or :class:`UniProtSequence`. These objects are
       then typically passed to the :class:`ihm.Entity` constructor."""
    pass


class Sequence(Reference):
    """Point to the sequence of an :class:`ihm.Entity` in a sequence database;
       convenience subclasses are provided for common sequence databases such
       as :class:`UniProtSequence`.

       These objects are typically passed to the :class:`ihm.Entity`
       constructor.

       See also :attr:`alignments` to describe the correspondence between
       the database and entity sequences.

       :param str db_name: The name of the database.
       :param str db_code: The name of the sequence in the database.
       :param str accession: The database accession.
       :param str sequence: The complete sequence, as a string of
              one-letter codes.
       :param str details: Longer text describing the sequence.
    """

    def __init__(self, db_name, db_code, accession, sequence, details=None):
        self.db_name, self.db_code = db_name, db_code
        self.accession = accession
        self.sequence, self.details = sequence, details

        #: All alignments between the reference and entity sequences, as
        #: :class:`Alignment` objects. If none are provided, a simple 1:1
        #: alignment is assumed.
        self.alignments = []

    def _signature(self):
        # Ignore "details"
        return ((self.db_name, self.db_code, self.accession, self.sequence)
                + tuple(a._signature() for a in self.alignments))

    def _get_alignments(self):
        if self.alignments:
            return self.alignments
        elif not hasattr(self, '_default_alignment'):
            self._default_alignment = Alignment()
        return [self._default_alignment]


class UniProtSequence(Sequence):
    """Point to the sequence of an :class:`ihm.Entity` in UniProt.

       These objects are typically passed to the :class:`ihm.Entity`
       constructor.

       :param str db_code: The UniProt name (e.g. NUP84_YEAST)
       :param str accession: The UniProt accession (e.g. P52891)

       See :class:`Sequence` for a description of the remaining parameters.
    """

    _db_name = 'UNP'

    def __init__(self, db_code, accession, sequence, details=None):
        super().__init__(self._db_name, db_code, accession, sequence, details)

    def __str__(self):
        return "<ihm.reference.UniProtSequence(%r)>" % self.accession

    @classmethod
    def from_accession(cls, accession):
        """Create :class:`UniProtSequence` from just an accession.
           This is done by querying the UniProt web API, so requires network
           access.

           :param str accession: The UniProt accession (e.g. P52891)
        """
        # urlopen returns bytes
        def decode(t):
            return t.decode('ascii')
        url = 'https://www.uniprot.org/uniprot/%s.fasta' % accession
        with urllib.request.urlopen(url) as fh:
            header = decode(fh.readline())
            spl = header.split('|')
            if len(spl) < 3 or spl[0] not in ('>sp', '>tr'):
                raise ValueError("Cannot parse UniProt header %s" % header)
            cd = spl[2].split(None, 1)
            code = cd[0]
            details = cd[1].rstrip('\r\n') if len(cd) > 1 else None
            seq = decode(fh.read()).replace('\n', '')
            return cls(code, accession, seq, details)


class Alignment:
    """A sequence range that aligns between the database and the entity.
       This describes part of the sequence in the sequence database
       (:class:`Sequence`) and in the :class:`ihm.Entity`. The two ranges
       must be the same length and have the same primary sequence (any
       differences must be described with :class:`SeqDif` objects).

       :param int db_begin: The first residue in the database sequence
              that is used (defaults to the entire sequence).
       :param int db_end: The last residue in the database sequence
              that is used (or None, the default, to use the entire sequence).
       :param int entity_begin: The first residue in the :class:`~ihm.Entity`
              sequence that is taken from the reference (defaults to the entire
              entity sequence).
       :param int entity_end: The last residue in the :class:`~ihm.Entity`
              sequence that is taken from the reference (or None, the default,
              to use the entire sequence).
       :param seq_dif: Single-point mutations made to the sequence.
       :type seq_dif: Sequence of :class:`SeqDif` objects.
    """
    def __init__(self, db_begin=1, db_end=None, entity_begin=1,
                 entity_end=None, seq_dif=[]):
        self.db_begin, self.db_end = db_begin, db_end
        self.entity_begin, self.entity_end = entity_begin, entity_end
        self.seq_dif = []
        self.seq_dif.extend(seq_dif)

    def _signature(self):
        return ((self.db_begin, self.db_end, self.entity_begin,
                 self.entity_end)
                + tuple(s._signature() for s in self.seq_dif))


class SeqDif:
    """Annotate a sequence difference between a reference and entity sequence.
       See :class:`Alignment`.

       :param int seq_id: The residue index in the entity sequence.
       :param db_monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
              in the reference sequence.
       :type db_monomer: :class:`ihm.ChemComp`
       :param monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
              in the entity sequence.
       :type monomer: :class:`ihm.ChemComp`
       :param str details: Descriptive text for the sequence difference.
    """
    def __init__(self, seq_id, db_monomer, monomer, details=None):
        self.seq_id, self.db_monomer = seq_id, db_monomer
        self.monomer, self.details = monomer, details

    def _signature(self):
        # Don't ignore "details", as these distinguish insertions from
        # deletions
        return (self.seq_id, self.db_monomer, self.monomer, self.details)