File: _dynamic.pyx

package info (click to toggle)
obitools 3.0.1~b26%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 26,756 kB
  • sloc: ansic: 24,299; python: 657; sh: 27; makefile: 21
file content (361 lines) | stat: -rwxr-xr-x 9,675 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
#cython: language_level=3

'''
Created on 14 sept. 2009

@author: coissac
'''

from obitools3.libalign import AlignedSequence
from obitools3.libalign import Alignment
from copy import deepcopy


######
#
# Import standard memory management function to improve
# efficiency of the alignment code
#
#

    
cdef AlignMatrix* allocateMatrix(int hsize, int vsize,AlignMatrix *matrix=NULL):
    
    vsize+=1
    hsize+=1
    
    if matrix is NULL:
        matrix = <AlignMatrix*>malloc(sizeof(AlignMatrix))
        matrix.vsize=0
        matrix.hsize=0
        matrix.msize=0
        matrix.matrix=NULL
        matrix.bestVJump=NULL
        matrix.bestHJump=NULL
        
    if hsize > matrix.hsize:
        matrix.bestVJump = <int*>realloc(matrix.bestVJump,hsize * sizeof(int))
        matrix.hsize=hsize
        
    if vsize > matrix.vsize:
        matrix.bestHJump = <int*>realloc(matrix.bestHJump,vsize * sizeof(int))
        matrix.vsize=vsize
        
    if (hsize * vsize) > matrix.msize:
        matrix.msize = hsize * vsize
        matrix.matrix = <AlignCell*>realloc(matrix.matrix, matrix.msize * sizeof(AlignCell))
        
    return matrix

cdef void freeMatrix(AlignMatrix* matrix):
    if matrix is not NULL:
        if matrix.matrix is not NULL:
            free(matrix.matrix)
        if matrix.bestVJump is not NULL:
            free(matrix.bestVJump)
        if matrix.bestHJump is not NULL:
            free(matrix.bestHJump)
        free(matrix)
        
cdef void resetMatrix(AlignMatrix* matrix):
    if matrix is not NULL:
        if matrix.matrix is not NULL:
            bzero(<void*>matrix.matrix, matrix.msize * sizeof(AlignCell))
        if matrix.bestHJump is not NULL:
            memset(<void*>matrix.bestHJump,255,matrix.vsize * sizeof(int))
        if matrix.bestVJump is not NULL:
            memset(<void*>matrix.bestVJump,255,matrix.hsize * sizeof(int))
     
    
cdef alignSequence* allocateSequence(object bioseq, alignSequence* seq=NULL) except *:
    cdef bytes strseq
    cdef int i
    
    if seq is NULL:
        seq = <alignSequence*>malloc(sizeof(alignSequence))
        seq.length=0
        seq.buffsize=0
        seq.sequence=NULL
        seq.quality=NULL
        seq.hasQuality=False
    
    seq.length=len(bioseq)
    if seq.length > seq.buffsize:
        #seq.sequence = <char*>realloc(seq.sequence,sizeof(char)*seq.length)
        #seq.quality  = <double*>realloc(seq.quality,sizeof(double)*seq.length)
        seq.buffsize = seq.length
    
    # TODO optimisable...
    #strseq = bioseq.seq.lower()
    #memcpy(seq.sequence,<char*>strseq,seq.length)
    seq.sequence = bioseq.seq

    return seq

cdef void freeSequence(alignSequence* seq):
    if seq is not NULL:
        #if seq.sequence is not NULL:
        #    free(<void*>seq.sequence)
        #if seq.quality is not NULL:
        #    free(<void*>seq.quality)
        free(seq)
        
cdef alignPath* allocatePath(long l1,long l2,alignPath* path=NULL):
    cdef long length=l1+l2
    
    if path is NULL:
        path = <alignPath*>malloc(sizeof(alignPath))
        path.length=0
        path.buffsize=0
        path.path=NULL
        
    if length > path.buffsize:
        path.buffsize=length
        path.path=<long*>realloc(path.path,sizeof(long)*length)
        
    path.length=0
    path.vStart=0
    path.hStart=0
    
    return path

cdef void reversePath(alignPath* path):
        cdef long i
        cdef long j
        
        j=path.length
        for i in range(<long>(path.length/2)):  # TODO not sure about the cast
            j-=1
            path.path[i],path.path[j]=path.path[j],path.path[i]

cdef void freePath(alignPath* path):
    if path is not NULL:
        if path.path is not NULL:
            free(<void*>path.path)
        free(<void*>path)
  
    
cdef int aascii = ord(b'a')
cdef int _basecode[26]
   
cdef int bitCount(int x):
    cdef int i=0
    while(x):
        i+=1
        x&=x-1
    return i

cpdef bint iupacMatch(unsigned char a, unsigned char b):
    cdef bint m 

    if a==42:    # * ascii code
        a=110    # n ascii code

    if b==42:    # * ascii code
        b=110    # n ascii code

    m = _basecode[a - aascii] & _basecode[b - aascii]
    return m

cpdef unsigned char encodeBase(unsigned char lettre):
    return _basecode[lettre - aascii]

cpdef double iupacPartialMatch(unsigned char a, unsigned char b):
    cdef int codeA
    cdef int codeB
    cdef int good
    cdef int all
    cdef double partial 
        
    if a==42:    # * ascii code
        a=110    # n ascii code

    if b==42:    # * ascii code
        b=110    # n ascii code

    codeA =  _basecode[a - aascii]
    codeB =  _basecode[b - aascii]
    good  =  bitCount(codeA & codeB)
    all   =  bitCount(codeA)  * bitCount(codeB)
    partial= <double>good / all 

    return partial
    
                
cdef class DynamicProgramming:

    def __init__(self,opengap,extgap):
        self.sequenceChanged=True
        self.scoreChanged=True

        self.matrix=NULL
        self.hSeq=NULL
        self.vSeq=NULL
        self.path=NULL
        
        self.horizontalSeq=None
        self.verticalSeq=None
        
        self._opengap=opengap
        self._extgap=extgap
        
    cdef int _vlen(self):
        return self.vSeq.length
        
    cdef int _hlen(self):
        return self.hSeq.length
        
    cdef int allocate(self) except -1:
        
        assert self.horizontalSeq is not None,'Sequence A must be set'
        assert self.verticalSeq is not None,'Sequence B must be set'
        
        cdef long lenH=self._hlen()
        cdef long lenV=self._vlen()

        self.matrix=allocateMatrix(lenH,lenV,self.matrix)
        return 0


    cdef double doAlignment(self) except? 0:
        pass
    
    cdef bint _needToCompute(self):
        return self.scoreChanged or self.sequenceChanged
    
    cdef void backtrack(self):
        pass
    
    property seqA:
            def __get__(self):
                return self.horizontalSeq
            
            def __set__(self, seq):
                self.sequenceChanged = True
                self.horizontalSeq = seq
                self.hSeq = allocateSequence(self.horizontalSeq, self.hSeq)
                
    property seqB:
            def __get__(self):
                return self.verticalSeq
            
            def __set__(self, seq):
                self.sequenceChanged = True
                self.verticalSeq = seq
                self.vSeq = allocateSequence(self.verticalSeq, self.vSeq)
                
    property opengap:
        def __get__(self):
            return self._opengap
        
        def __set__(self,opengap):
            self._opengap=opengap 
            self.scoreChanged=True
            
    property extgap:
        def __get__(self):
            return self._extgap
        
        def __set__(self,extgap):
            self._extgap=extgap 
            self.scoreChanged=True
            
    property needToCompute:
        def __get__(self):
            return self.scoreChanged or self.sequenceChanged

    property score:
        def __get__(self):
            return self.doAlignment()
                    
    cdef void reset(self):
        self.scoreChanged=True
        resetMatrix(self.matrix)
        
    cdef inline int index(self, int x, int y):
        return (self._hlen()+1) * y + x
    
    cdef void clean(self):
        freeMatrix(self.matrix)
        freeSequence(self.hSeq)
        freeSequence(self.vSeq)
        freePath(self.path)
        
    def __dealloc__(self):
        self.clean()
        
    def __call__(self):
        cdef list hgaps=[]
        cdef list vgaps=[]
        cdef list b
        cdef int  hp=0
        cdef int  vp=0
        cdef int  lenh=0
        cdef int  lenv=0
        cdef int  h,v,p
        cdef int  i
        cdef object ali
        cdef double score
                
        if self._needToCompute():
            score = self.doAlignment()
            self.backtrack()
            for i in range(self.path.length-1,-1,-1):
                p=self.path.path[i]
                if p==0:
                    hp+=1
                    vp+=1
                    lenh+=1
                    lenv+=1
                elif p>0:
                    hp+=p
                    lenh+=p
                    vgaps.append([vp,p])
                    vp=0
                else:
                    vp-=p
                    lenv-=p
                    hgaps.append([hp,-p])
                    hp=0
                
            if hp:
                hgaps.append([hp,0])
            if vp:
                vgaps.append([vp,0])
                
            if lenh < self._hlen():
                hseq=self.horizontalSeq[self.path.hStart:self.path.hStart+lenh]
            else:
                hseq=self.horizontalSeq
            
            hseq=AlignedSequence(hseq) 
            hseq.gaps=hgaps       
            
            if lenv < self._vlen():
                vseq=self.verticalSeq[self.path.vStart:self.path.vStart+lenv]
            else:
                vseq=self.verticalSeq
    
            vseq=AlignedSequence(vseq) 
            vseq.gaps=vgaps       
            
            ali=Alignment()
            ali.append(hseq)
            ali.append(vseq)
            
            ali.score=score
            self.alignment=ali
        ali=self.alignment.clone()
        ali.score=self.alignment.score
        return ali
  
        
            
            
# initialize iupac carray

__basecode=[1,14,2,13,0,0,4,11,0,0,12,0,3,15,0,0,0,5,6,8,8,7,9,0,10,0]            
for i in range(26):
    _basecode[i]=__basecode[i]