1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
"""Classes for providing extra information about an :class:`ihm.Entity`"""
import urllib.request
class Reference:
"""Base class for extra information about an :class:`ihm.Entity`.
This class is not used directly; instead, use a subclass such as
:class:`Sequence` or :class:`UniProtSequence`. These objects are
then typically passed to the :class:`ihm.Entity` constructor."""
pass
class Sequence(Reference):
"""Point to the sequence of an :class:`ihm.Entity` in a sequence database;
convenience subclasses are provided for common sequence databases such
as :class:`UniProtSequence`.
These objects are typically passed to the :class:`ihm.Entity`
constructor.
See also :attr:`alignments` to describe the correspondence between
the database and entity sequences.
:param str db_name: The name of the database.
:param str db_code: The name of the sequence in the database.
:param str accession: The database accession.
:param str sequence: The complete sequence, as a string of
one-letter codes.
:param str details: Longer text describing the sequence.
"""
def __init__(self, db_name, db_code, accession, sequence, details=None):
self.db_name, self.db_code = db_name, db_code
self.accession = accession
self.sequence, self.details = sequence, details
#: All alignments between the reference and entity sequences, as
#: :class:`Alignment` objects. If none are provided, a simple 1:1
#: alignment is assumed.
self.alignments = []
def _signature(self):
# Ignore "details"
return ((self.db_name, self.db_code, self.accession, self.sequence)
+ tuple(a._signature() for a in self.alignments))
def _get_alignments(self):
if self.alignments:
return self.alignments
elif not hasattr(self, '_default_alignment'):
self._default_alignment = Alignment()
return [self._default_alignment]
class UniProtSequence(Sequence):
"""Point to the sequence of an :class:`ihm.Entity` in UniProt.
These objects are typically passed to the :class:`ihm.Entity`
constructor.
:param str db_code: The UniProt name (e.g. NUP84_YEAST)
:param str accession: The UniProt accession (e.g. P52891)
See :class:`Sequence` for a description of the remaining parameters.
"""
_db_name = 'UNP'
def __init__(self, db_code, accession, sequence, details=None):
super().__init__(self._db_name, db_code, accession, sequence, details)
def __str__(self):
return "<ihm.reference.UniProtSequence(%r)>" % self.accession
@classmethod
def from_accession(cls, accession):
"""Create :class:`UniProtSequence` from just an accession.
This is done by querying the UniProt web API, so requires network
access.
:param str accession: The UniProt accession (e.g. P52891)
"""
# urlopen returns bytes
def decode(t):
return t.decode('ascii')
url = 'https://www.uniprot.org/uniprot/%s.fasta' % accession
with urllib.request.urlopen(url) as fh:
header = decode(fh.readline())
spl = header.split('|')
if len(spl) < 3 or spl[0] not in ('>sp', '>tr'):
raise ValueError("Cannot parse UniProt header %s" % header)
cd = spl[2].split(None, 1)
code = cd[0]
details = cd[1].rstrip('\r\n') if len(cd) > 1 else None
seq = decode(fh.read()).replace('\n', '')
return cls(code, accession, seq, details)
class Alignment:
"""A sequence range that aligns between the database and the entity.
This describes part of the sequence in the sequence database
(:class:`Sequence`) and in the :class:`ihm.Entity`. The two ranges
must be the same length and have the same primary sequence (any
differences must be described with :class:`SeqDif` objects).
:param int db_begin: The first residue in the database sequence
that is used (defaults to the entire sequence).
:param int db_end: The last residue in the database sequence
that is used (or None, the default, to use the entire sequence).
:param int entity_begin: The first residue in the :class:`~ihm.Entity`
sequence that is taken from the reference (defaults to the entire
entity sequence).
:param int entity_end: The last residue in the :class:`~ihm.Entity`
sequence that is taken from the reference (or None, the default,
to use the entire sequence).
:param seq_dif: Single-point mutations made to the sequence.
:type seq_dif: Sequence of :class:`SeqDif` objects.
"""
def __init__(self, db_begin=1, db_end=None, entity_begin=1,
entity_end=None, seq_dif=[]):
self.db_begin, self.db_end = db_begin, db_end
self.entity_begin, self.entity_end = entity_begin, entity_end
self.seq_dif = []
self.seq_dif.extend(seq_dif)
def _signature(self):
return ((self.db_begin, self.db_end, self.entity_begin,
self.entity_end)
+ tuple(s._signature() for s in self.seq_dif))
class SeqDif:
"""Annotate a sequence difference between a reference and entity sequence.
See :class:`Alignment`.
:param int seq_id: The residue index in the entity sequence.
:param db_monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
in the reference sequence.
:type db_monomer: :class:`ihm.ChemComp`
:param monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
in the entity sequence.
:type monomer: :class:`ihm.ChemComp`
:param str details: Descriptive text for the sequence difference.
"""
def __init__(self, seq_id, db_monomer, monomer, details=None):
self.seq_id, self.db_monomer = seq_id, db_monomer
self.monomer, self.details = monomer, details
def _signature(self):
# Don't ignore "details", as these distinguish insertions from
# deletions
return (self.seq_id, self.db_monomer, self.monomer, self.details)
|