File: pycompare.py

package info (click to toggle)
python-cogent 1.4.1-1.2
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze
  • size: 13,260 kB
  • ctags: 20,087
  • sloc: python: 116,163; ansic: 732; makefile: 74; sh: 9
file content (55 lines) | stat: -rw-r--r-- 2,032 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
# Very slow.  See compare.pyx

__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__credits__ = ["Peter Maxwell", "Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Gavin Huttley"
__email__ = "gavin.huttley@anu.edu.au"
__status__ = "Production"

def dotplot(seq1, seq2, window, threshold, min_gap_length=0, band=None, progress=False):
    """A list of line segments covering the window-mers with identical matches > threshold
    
    Gaps of size less than min_gap will be hidden, which saves on line segments.
    if 'band' is not None then it limits the searched area
    """
    
    result = []
    if band is None:
        band = max(len(seq1), len(seq2))
    for diagonal in range(-min(len(seq1), band), min(len(seq2), band)+1):
        d_segments = []
        was_high = False
        scores = [0] * window
        score = 0
        (i_lo, i_hi) = max(0, -diagonal), min(len(seq1), len(seq2)-diagonal)
        if progress:
            if diagonal %100 == 0:
                print diagonal, (i_lo, i_lo+diagonal), (i_hi, i_hi+diagonal), len(result)
        for i in range(i_lo, i_hi):
            j = i + diagonal
            k = i % window
            score -= scores[k]
            scores[k] = seq1[i] == seq2[j]
            score += scores[k]
            if score >= threshold:
                if not was_high:
                    start = max(i_lo, i - window)
                    was_high = True
            else:
                if was_high:
                    end = i - 1
                    if d_segments and start-d_segments[-1][1] < min_gap_length:
                        (start, jumped_end) = d_segments.pop()
                    d_segments.append((start, end))
                    was_high = False
        if was_high:
            end = i - 1
            d_segments.append((start, end))
        
        for (start, end) in d_segments:
            result.append(((start, start+diagonal), (end, end+diagonal)))
    return result