1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
# Copyright 2005 by Jonathan Taylor.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""This module deals with CAPS markers.
A CAPS marker is a location a DifferentialCutsite as described below and a
set of primers that can be used to visualize this. More information can
be found in the paper located at:
http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8106085&dopt=Abstract
Copyright Jonathan Taylor 2005
"""
class DifferentialCutsite(object):
"""A differential cutsite is a location in an alignment where an enzyme cuts
at least one sequence and also cannot cut at least one other sequence.
Members:
start Where it lives in the alignment.
enzyme The enzyme that causes this.
cuts_in A list of sequences (as indexes into the alignment) the
enzyme cuts in.
blocked_in A list of sequences (as indexes into the alignment) the
enzyme is blocked in.
"""
def __init__(self, **kwds):
"""Initialize a DifferentialCutsite.
Each member (as listed in the class description) should be included as a
keyword.
"""
self.start = int(kwds["start"])
self.enzyme = kwds["enzyme"]
self.cuts_in = kwds["cuts_in"]
self.blocked_in = kwds["blocked_in"]
class AlignmentHasDifferentLengthsError(Exception):
pass
class CAPSMap(object):
"""A map of an alignment showing all possible dcuts.
Members:
alignment The alignment that is mapped.
dcuts A list of possible CAPS markers in the form of
DifferentialCutsites.
"""
def __init__(self, alignment, enzymes = []):
"""Initialize the CAPSMap
Required:
alignment The alignment to be mapped.
Optional:
enzymes The enzymes to be used to create the map.
"""
self.sequences = [rec.seq for rec in alignment]
self.size = len(self.sequences)
self.length = len(self.sequences[0])
for seq in self.sequences:
if len(seq) != self.length:
raise AlignmentHasDifferentLengthsError
self.alignment = alignment
self.enzymes = enzymes
# look for dcuts
self._digest()
def _digest_with(self, enzyme):
cuts = [] # list of lists, one per sequence
all = []
# go through each sequence
for seq in self.sequences:
# grab all the cuts in the sequence
seq_cuts = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
# maintain a list of all cuts in all sequences
all.extend(seq_cuts)
cuts.append(seq_cuts)
# we sort the all list and remove duplicates
all.sort()
last = -999
new = []
for cut in all:
if cut != last:
new.append(cut)
last = cut
all = new
# all now has indices for all sequences in the alignment
for cut in all:
# test for dcuts
cuts_in = []
blocked_in = []
for i in range(0, self.size):
seq = self.sequences[i]
if cut in cuts[i]:
cuts_in.append(i)
else:
blocked_in.append(i)
if cuts_in != [] and blocked_in != []:
self.dcuts.append(DifferentialCutsite(start = cut, enzyme = enzyme, cuts_in = cuts_in, blocked_in = blocked_in))
def _digest(self):
self.dcuts = []
for enzyme in self.enzymes:
self._digest_with(enzyme)
|