1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
from obitools3.dms.obiseq import Nuc_Seq_Stored, Nuc_Seq
from functools import reduce
import struct
from copy import deepcopy
class GappedPositionException(Exception):
pass
class AlignedSequence(Nuc_Seq): # TODO discuss. can be both Nuc_Seq or Nuc_Seq_Stored....
def __init__(self, seq) :
self.wrapped = seq
self._length=len(seq)
self._gaps=[[self._length,0]]
def clone(self):
seq = AlignedSequence(self.wrapped)
seq._gaps=deepcopy(self._gaps)
seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
return seq
def setGaps(self, value):
'''
Set gap vector to an AlignedSequence.
Gap vector describes the gap positions on a sequence.
It is a gap of couple. The first couple member is the count
of sequence letter, the second one is the gap length.
@param value: a list of length 2 list describing gap positions
@type value: list of couple
'''
assert isinstance(value, list),'Gap vector must be a list'
assert reduce(lambda x,y: x and y,
(isinstance(z, list) and len(z)==2 for z in value),
True),"Value must be a list of length 2 list"
lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0)
assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence"
self._gaps = value
self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0)
def getGaps(self):
return tuple(self._gaps)
gaps = property(getGaps, setGaps, None, "Gaps's Docstring")
def _getIndice(self,pos):
i=0
cpos=0
for s,g in self._gaps:
cpos+=s
if cpos>pos:
return i,pos-cpos+s
cpos+=g
if cpos>pos:
return i,-pos+cpos-g-1
i+=1
raise IndexError
def getId(self):
d = self.id or ("%s_ALN" % self.wrapped.id)
return d
def __len__(self):
return self._length
def get_str(self):
return ''.join([x.decode('ascii') for x in self])
def __iter__(self):
def isymb():
cpos=0
for s,g in self._gaps:
for x in range(s):
yield (self.wrapped[cpos+x])
for x in range(g):
yield b"-"
cpos+=s
return isymb()
def _posInWrapped(self,position):
i,s=self._getIndice(position)
if s<0:
raise GappedPositionException
value=self._gaps
p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s
return p
def get_symbol_at(self,position):
try:
return self.wrapped.get_symbol_at(self._posInWrapped(position))
except GappedPositionException:
return b"-"
def insertGap(self,position,count=1):
if position==self._length:
idx=len(self._gaps)-1
p=-1
else:
idx,p = self._getIndice(position)
if p >= 0:
self._gaps.insert(idx, [p,count])
self._gaps[idx+1][0]-=p
else:
self._gaps[idx][1]+=count
self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
class Alignment(list):
def _assertData(self,data):
# assert isinstance(data, Nuc_Seq_Stored),'You must only add bioseq to an alignement' TODO
if hasattr(self, '_alignlen'):
assert self._alignlen==len(data),'All aligned sequences must have the same length'
else:
self._alignlen=len(data)
return data
def clone(self):
ali = Alignment(x.clone() for x in self)
return ali
def append(self,data):
data = self._assertData(data)
list.append(self,data)
def __setitem__(self,index,data):
data = self._assertData(data)
list.__setitem__(self,index,data)
def getSite(self,key):
if isinstance(key,int):
return [x[key] for x in self]
def insertGap(self,position,count=1):
for s in self:
s.insertGap(position,count)
def isFullGapSite(self,key):
return reduce(lambda x,y: x and y,(z==b"-" for z in self.getSite(key)),True)
def isGappedSite(self,key):
return b"-" in self.getSite(key)
def __str__(self):
l = len(self[0])
rep=""
idmax = max(len(x.id) for x in self)+2
template= "%%-%ds %%-60s" % idmax
for p in range(0,l,60):
for s in self:
rep+= (template % (s.id,s[p:p+60])).strip() + '\n'
rep+="\n"
return rep
def columnIterator(alignment):
lali = len(alignment[0])
for p in range(lali):
c = []
for x in alignment:
c.append(x[p])
yield c
|