File: __init__.py

package info (click to toggle)
python-biopython 1.64%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 44,416 kB
  • ctags: 12,472
  • sloc: python: 153,759; xml: 67,286; ansic: 9,003; sql: 1,488; makefile: 144; sh: 59
file content (125 lines) | stat: -rw-r--r-- 3,776 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Copyright 2005 by Jonathan Taylor.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""This module deals with CAPS markers.

A CAPS marker is a location a DifferentialCutsite as described below and a
set of primers that can be used to visualize this.  More information can
be found in the paper located at:

http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8106085&dopt=Abstract

Copyright Jonathan Taylor 2005
"""


class DifferentialCutsite(object):
    """A differential cutsite is a location in an alignment where an enzyme cuts
    at least one sequence and also cannot cut at least one other sequence.

    Members:
    start       Where it lives in the alignment.
    enzyme      The enzyme that causes this.
    cuts_in     A list of sequences (as indexes into the alignment) the
                enzyme cuts in.
    blocked_in  A list of sequences (as indexes into the alignment) the
                enzyme is blocked in.

    """

    def __init__(self, **kwds):
        """Initialize a DifferentialCutsite.

        Each member (as listed in the class description) should be included as a
        keyword.
        """

        self.start = int(kwds["start"])
        self.enzyme = kwds["enzyme"]
        self.cuts_in = kwds["cuts_in"]
        self.blocked_in = kwds["blocked_in"]


class AlignmentHasDifferentLengthsError(Exception):
    pass


class CAPSMap(object):
    """A map of an alignment showing all possible dcuts.

    Members:
    alignment  The alignment that is mapped.
    dcuts      A list of possible CAPS markers in the form of
                         DifferentialCutsites.
    """

    def __init__(self, alignment, enzymes = []):
        """Initialize the CAPSMap

        Required:
        alignment    The alignment to be mapped.

        Optional:
        enzymes      The enzymes to be used to create the map.
        """

        self.sequences = [rec.seq for rec in alignment]
        self.size = len(self.sequences)
        self.length = len(self.sequences[0])
        for seq in self.sequences:
            if len(seq) != self.length:
                raise AlignmentHasDifferentLengthsError

        self.alignment = alignment
        self.enzymes = enzymes

        # look for dcuts
        self._digest()

    def _digest_with(self, enzyme):
        cuts = [] # list of lists, one per sequence
        all = []

        # go through each sequence
        for seq in self.sequences:
            # grab all the cuts in the sequence
            seq_cuts = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
            # maintain a list of all cuts in all sequences
            all.extend(seq_cuts)
            cuts.append(seq_cuts)

        # we sort the all list and remove duplicates
        all.sort()

        last = -999
        new = []
        for cut in all:
            if cut != last:
                new.append(cut)
            last = cut
        all = new
        # all now has indices for all sequences in the alignment

        for cut in all:
            # test for dcuts

            cuts_in = []
            blocked_in = []

            for i in range(0, self.size):
                seq = self.sequences[i]
                if cut in cuts[i]:
                    cuts_in.append(i)
                else:
                    blocked_in.append(i)

            if cuts_in != [] and blocked_in != []:
                self.dcuts.append(DifferentialCutsite(start = cut, enzyme = enzyme, cuts_in = cuts_in, blocked_in = blocked_in))

    def _digest(self):
        self.dcuts = []

        for enzyme in self.enzymes:
            self._digest_with(enzyme)