File: __init__.py

package info (click to toggle)
obitools 3.0.1~b26%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,788 kB
  • sloc: ansic: 24,299; python: 657; sh: 27; makefile: 20
file content (166 lines) | stat: -rwxr-xr-x 4,988 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
from obitools3.dms.obiseq import Nuc_Seq_Stored, Nuc_Seq
from functools import reduce
import struct 
from copy import deepcopy


class GappedPositionException(Exception):
    pass

class AlignedSequence(Nuc_Seq):   # TODO discuss. can be both Nuc_Seq or Nuc_Seq_Stored....

    def __init__(self, seq) :
        self.wrapped = seq
        self._length=len(seq)
        self._gaps=[[self._length,0]]
        
    def clone(self):
        seq = AlignedSequence(self.wrapped)
        seq._gaps=deepcopy(self._gaps)
        seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
        return seq

    def setGaps(self, value):
        '''
        Set gap vector to an AlignedSequence.
        
        Gap vector describes the gap positions on a sequence.
        It is a gap of couple. The first couple member is the count
        of sequence letter, the second one is the gap length. 
        @param value: a list of length 2 list describing gap positions
        @type value: list of couple
        '''
        assert isinstance(value, list),'Gap vector must be a list'
        assert reduce(lambda x,y: x and y,
                      (isinstance(z, list) and len(z)==2 for z in value),
                      True),"Value must be a list of length 2 list"
                       
        lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0)
        assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence"
        self._gaps = value
        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0)

    def getGaps(self):
        return tuple(self._gaps)
    gaps = property(getGaps, setGaps, None, "Gaps's Docstring")

    def _getIndice(self,pos):
        i=0
        cpos=0
        for s,g in self._gaps:
            cpos+=s
            if cpos>pos:
                return i,pos-cpos+s
            cpos+=g 
            if cpos>pos:
                return i,-pos+cpos-g-1
            i+=1
        raise IndexError

    def getId(self):
        d = self.id or ("%s_ALN" % self.wrapped.id)
        return d

    def __len__(self):
        return self._length
    
    def get_str(self):
        return ''.join([x.decode('ascii') for x in self])
    
    def __iter__(self):
        def isymb():
            cpos=0
            for s,g in self._gaps:
                for x in range(s):
                    yield (self.wrapped[cpos+x])
                for x in range(g):
                    yield b"-"
                cpos+=s
        return isymb()
    
    def _posInWrapped(self,position):
        i,s=self._getIndice(position)
        if s<0:
            raise GappedPositionException
        value=self._gaps
        p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s
        return p

    def get_symbol_at(self,position):
        try:
            return self.wrapped.get_symbol_at(self._posInWrapped(position))
        except GappedPositionException:
            return b"-"
        
    def insertGap(self,position,count=1):
        if position==self._length:
            idx=len(self._gaps)-1
            p=-1
        else:
            idx,p = self._getIndice(position)

        if p >= 0:
            self._gaps.insert(idx, [p,count])
            self._gaps[idx+1][0]-=p
        else:
            self._gaps[idx][1]+=count
        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
    

class Alignment(list):

    def _assertData(self,data):
#        assert isinstance(data, Nuc_Seq_Stored),'You must only add bioseq to an alignement' TODO
        if hasattr(self, '_alignlen'):
            assert self._alignlen==len(data),'All aligned sequences must have the same length'
        else:
            self._alignlen=len(data)
        return data  
            
    def clone(self):
        ali = Alignment(x.clone() for x in self)
        return ali
    
    def append(self,data):
        data = self._assertData(data)
        list.append(self,data)
        
    def __setitem__(self,index,data):
        
        data = self._assertData(data)
        list.__setitem__(self,index,data)
        
    def getSite(self,key):
        if isinstance(key,int):
            return [x[key] for x in self]
        
    def insertGap(self,position,count=1):
        for s in self:
            s.insertGap(position,count)
        
    def isFullGapSite(self,key):
        return reduce(lambda x,y: x and y,(z==b"-" for z in self.getSite(key)),True)
    
    def isGappedSite(self,key):
        return b"-" in self.getSite(key)

    def __str__(self):
        l = len(self[0])
        rep=""
        idmax = max(len(x.id) for x in self)+2
        template= "%%-%ds  %%-60s" % idmax
        for p in range(0,l,60):
            for s in self:
                rep+= (template % (s.id,s[p:p+60])).strip() + '\n'
            rep+="\n"
        return rep


def columnIterator(alignment):
    lali = len(alignment[0])
    for p in range(lali):
        c = []
        for x in alignment:
            c.append(x[p])
        yield c