File: __init__.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 46,856 kB
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (130 lines) | stat: -rw-r--r-- 4,043 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# Copyright 2005 by Jonathan Taylor.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""This module deals with CAPS markers.

A CAPS marker is a location a DifferentialCutsite as described below and a
set of primers that can be used to visualize this.  More information can
be found in the paper `Konieczny and Ausubel (1993)`_ (PMID 8106085).

.. _`Konieczny and Ausubel (1993)`: http://dx.doi.org/10.1046/j.1365-313X.1993.04020403.x
"""


class DifferentialCutsite(object):
    """Differential enzyme cutsite in an alignment.

    A differential cutsite is a location in an alignment where an enzyme cuts
    at least one sequence and also cannot cut at least one other sequence.

    Members:
     - start - Where it lives in the alignment.
     - enzyme - The enzyme that causes this.
     - cuts_in - A list of sequences (as indexes into the alignment) the
       enzyme cuts in.
     - blocked_in - A list of sequences (as indexes into the alignment) the
       enzyme is blocked in.

    """

    def __init__(self, **kwds):
        """Initialize a DifferentialCutsite.

        Each member (as listed in the class description) should be included as a
        keyword.
        """

        self.start = int(kwds["start"])
        self.enzyme = kwds["enzyme"]
        self.cuts_in = kwds["cuts_in"]
        self.blocked_in = kwds["blocked_in"]


class AlignmentHasDifferentLengthsError(Exception):
    pass


class CAPSMap(object):
    """A map of an alignment showing all possible dcuts.

    Members:
     - alignment - The alignment that is mapped.
     - dcuts - A list of possible CAPS markers in the form of
       DifferentialCutsites.
    """

    def __init__(self, alignment, enzymes=None):
        """Initialize the CAPSMap.

        Required:
         - alignment - The alignment to be mapped.

        Optional:
         - enzymes - List of enzymes to be used to create the map.
           Defaults to an empty list.
        """
        if enzymes is None:
            enzymes = []
        self.sequences = [rec.seq for rec in alignment]
        self.size = len(self.sequences)
        self.length = len(self.sequences[0])
        for seq in self.sequences:
            if len(seq) != self.length:
                raise AlignmentHasDifferentLengthsError

        self.alignment = alignment
        self.enzymes = enzymes

        # look for dcuts
        self._digest()

    def _digest_with(self, enzyme):
        cuts = []  # list of lists, one per sequence
        all = []

        # go through each sequence
        for seq in self.sequences:
            # grab all the cuts in the sequence
            seq_cuts = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
            # maintain a list of all cuts in all sequences
            all.extend(seq_cuts)
            cuts.append(seq_cuts)

        # we sort the all list and remove duplicates
        all.sort()

        last = -999
        new = []
        for cut in all:
            if cut != last:
                new.append(cut)
            last = cut
        all = new
        # all now has indices for all sequences in the alignment

        for cut in all:
            # test for dcuts

            cuts_in = []
            blocked_in = []

            for i in range(0, self.size):
                seq = self.sequences[i]
                if cut in cuts[i]:
                    cuts_in.append(i)
                else:
                    blocked_in.append(i)

            if cuts_in != [] and blocked_in != []:
                self.dcuts.append(DifferentialCutsite(start=cut,
                                                      enzyme=enzyme,
                                                      cuts_in=cuts_in,
                                                      blocked_in=blocked_in))

    def _digest(self):
        self.dcuts = []

        for enzyme in self.enzymes:
            self._digest_with(enzyme)