File: _compare.pyx

package info (click to toggle)
python-cogent 1.5.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 16,424 kB
  • ctags: 24,343
  • sloc: python: 134,200; makefile: 100; ansic: 17; sh: 10
file content (62 lines) | stat: -rw-r--r-- 1,706 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""50x speedup for dotplots, but sequences must be strings and scoring is based on identity only
"""

version_info = (1, 3)
__version__ = "('1', '5', '3')"

cdef int cmax(int a, int b):
    if a > b:
        return a
    else:
        return b
            
cdef int cmin(int a, int b):
    if a < b:
        return a
    else:
        return b
            
def segments_from_diagonal(
        char seq1[],
        char seq2[],
        int window,
        int threshold,
        int min_gap_length,
        int diagonal):
    """List of ((x1,y1), (x2,y2)) for diagonal line segments scoring >= threshold/window"""
        
    cdef int was_high, score, i, i_lo, i_hi, j, k, start, prior_end
    cdef int len1, len2
    cdef int scores[100]   # yuk - use malloc?
    assert window < 100
    len1 = len(seq1)
    len2 = len(seq2)
    result = []
    was_high = 0
    for i from 0 <= i < window:
        scores[i] = 0
    score = 0
    i_lo = cmax(0, 0-diagonal)
    i_hi = cmin(len1, len2-diagonal)
    prior_end = 0
    for i from i_lo <=  i < i_hi:
        j = i + diagonal
        k = i % window
        score -= scores[k]
        scores[k] = (seq1[i] == seq2[j])
        score += scores[k]
        if score >= threshold:
            if not was_high:
                start = cmax(i_lo, i - window)
                if min_gap_length and prior_end:
                    if start < prior_end + min_gap_length:
                        (start, jumped_end) = result.pop()
                was_high = 1
        else:
            if was_high:
                result.append((start, i))
                prior_end = i
                was_high = 0
    if was_high:
        result.append((start, i_hi))
    return result