"""Classes for providing extra information about an :class:`ihm.Entity`"""

import urllib.request


class Reference:
    """Base class for extra information about an :class:`ihm.Entity`.

       This class is not used directly; instead, use a subclass such as
       :class:`Sequence` or :class:`UniProtSequence`. These objects are
       then typically passed to the :class:`ihm.Entity` constructor."""
    pass


class Sequence(Reference):
    """Point to the sequence of an :class:`ihm.Entity` in a sequence database;
       convenience subclasses are provided for common sequence databases such
       as :class:`UniProtSequence`.

       These objects are typically passed to the :class:`ihm.Entity`
       constructor.

       See also :attr:`alignments` to describe the correspondence between
       the database and entity sequences.

       :param str db_name: The name of the database.
       :param str db_code: The name of the sequence in the database.
       :param str accession: The database accession.
       :param str sequence: The complete sequence, as a string of
              one-letter codes.
       :param str details: Longer text describing the sequence.
    """

    def __init__(self, db_name, db_code, accession, sequence, details=None):
        self.db_name, self.db_code = db_name, db_code
        self.accession = accession
        self.sequence, self.details = sequence, details

        #: All alignments between the reference and entity sequences, as
        #: :class:`Alignment` objects. If none are provided, a simple 1:1
        #: alignment is assumed.
        self.alignments = []

    def _signature(self):
        # Ignore "details"
        return ((self.db_name, self.db_code, self.accession, self.sequence)
                + tuple(a._signature() for a in self.alignments))

    def _get_alignments(self):
        if self.alignments:
            return self.alignments
        elif not hasattr(self, '_default_alignment'):
            self._default_alignment = Alignment()
        return [self._default_alignment]


class UniProtSequence(Sequence):
    """Point to the sequence of an :class:`ihm.Entity` in UniProt.

       These objects are typically passed to the :class:`ihm.Entity`
       constructor.

       :param str db_code: The UniProt name (e.g. NUP84_YEAST)
       :param str accession: The UniProt accession (e.g. P52891)

       See :class:`Sequence` for a description of the remaining parameters.
    """

    _db_name = 'UNP'

    def __init__(self, db_code, accession, sequence, details=None):
        super().__init__(self._db_name, db_code, accession, sequence, details)

    def __str__(self):
        return "<ihm.reference.UniProtSequence(%r)>" % self.accession

    @classmethod
    def from_accession(cls, accession):
        """Create :class:`UniProtSequence` from just an accession.
           This is done by querying the UniProt web API, so requires network
           access.

           :param str accession: The UniProt accession (e.g. P52891)
        """
        # urlopen returns bytes
        def decode(t):
            return t.decode('ascii')
        url = 'https://www.uniprot.org/uniprot/%s.fasta' % accession
        with urllib.request.urlopen(url) as fh:
            header = decode(fh.readline())
            spl = header.split('|')
            if len(spl) < 3 or spl[0] not in ('>sp', '>tr'):
                raise ValueError("Cannot parse UniProt header %s" % header)
            cd = spl[2].split(None, 1)
            code = cd[0]
            details = cd[1].rstrip('\r\n') if len(cd) > 1 else None
            seq = decode(fh.read()).replace('\n', '')
            return cls(code, accession, seq, details)


class Alignment:
    """A sequence range that aligns between the database and the entity.
       This describes part of the sequence in the sequence database
       (:class:`Sequence`) and in the :class:`ihm.Entity`. The two ranges
       must be the same length and have the same primary sequence (any
       differences must be described with :class:`SeqDif` objects).

       :param int db_begin: The first residue in the database sequence
              that is used (defaults to the entire sequence).
       :param int db_end: The last residue in the database sequence
              that is used (or None, the default, to use the entire sequence).
       :param int entity_begin: The first residue in the :class:`~ihm.Entity`
              sequence that is taken from the reference (defaults to the entire
              entity sequence).
       :param int entity_end: The last residue in the :class:`~ihm.Entity`
              sequence that is taken from the reference (or None, the default,
              to use the entire sequence).
       :param seq_dif: Single-point mutations made to the sequence.
       :type seq_dif: Sequence of :class:`SeqDif` objects.
    """
    def __init__(self, db_begin=1, db_end=None, entity_begin=1,
                 entity_end=None, seq_dif=[]):
        self.db_begin, self.db_end = db_begin, db_end
        self.entity_begin, self.entity_end = entity_begin, entity_end
        self.seq_dif = []
        self.seq_dif.extend(seq_dif)

    def _signature(self):
        return ((self.db_begin, self.db_end, self.entity_begin,
                 self.entity_end)
                + tuple(s._signature() for s in self.seq_dif))


class SeqDif:
    """Annotate a sequence difference between a reference and entity sequence.
       See :class:`Alignment`.

       :param int seq_id: The residue index in the entity sequence.
       :param db_monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
              in the reference sequence.
       :type db_monomer: :class:`ihm.ChemComp`
       :param monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
              in the entity sequence.
       :type monomer: :class:`ihm.ChemComp`
       :param str details: Descriptive text for the sequence difference.
    """
    def __init__(self, seq_id, db_monomer, monomer, details=None):
        self.seq_id, self.db_monomer = seq_id, db_monomer
        self.monomer, self.details = monomer, details

    def _signature(self):
        # Don't ignore "details", as these distinguish insertions from
        # deletions
        return (self.seq_id, self.db_monomer, self.monomer, self.details)
